]> git.ipfire.org Git - thirdparty/mdadm.git/blob - super-intel.c
imsm: Implement recover_backup_imsm() for imsm metadata
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
45 #define MPB_ATTRIB_PM __cpu_to_le32(0x40000000)
46 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
47 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
48 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
51 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
52 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
53
54 #define MPB_SECTOR_CNT 2210
55 #define IMSM_RESERVED_SECTORS 4096
56 #define SECT_PER_MB_SHIFT 11
57
58 /* Disk configuration info. */
59 #define IMSM_MAX_DEVICES 255
60 struct imsm_disk {
61 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
62 __u32 total_blocks; /* 0xE8 - 0xEB total blocks */
63 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
64 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
65 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
66 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
67 __u32 status; /* 0xF0 - 0xF3 */
68 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
69 #define IMSM_DISK_FILLERS 4
70 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF4 - 0x107 MPB_DISK_FILLERS for future expansion */
71 };
72
73 /* RAID map configuration infos. */
74 struct imsm_map {
75 __u32 pba_of_lba0; /* start address of partition */
76 __u32 blocks_per_member;/* blocks per member */
77 __u32 num_data_stripes; /* number of data stripes */
78 __u16 blocks_per_strip;
79 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
80 #define IMSM_T_STATE_NORMAL 0
81 #define IMSM_T_STATE_UNINITIALIZED 1
82 #define IMSM_T_STATE_DEGRADED 2
83 #define IMSM_T_STATE_FAILED 3
84 __u8 raid_level;
85 #define IMSM_T_RAID0 0
86 #define IMSM_T_RAID1 1
87 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
88 __u8 num_members; /* number of member disks */
89 __u8 num_domains; /* number of parity domains */
90 __u8 failed_disk_num; /* valid only when state is degraded */
91 __u8 ddf;
92 __u32 filler[7]; /* expansion area */
93 #define IMSM_ORD_REBUILD (1 << 24)
94 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
95 * top byte contains some flags
96 */
97 } __attribute__ ((packed));
98
99 struct imsm_vol {
100 __u32 curr_migr_unit;
101 __u32 checkpoint_id; /* id to access curr_migr_unit */
102 __u8 migr_state; /* Normal or Migrating */
103 #define MIGR_INIT 0
104 #define MIGR_REBUILD 1
105 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
106 #define MIGR_GEN_MIGR 3
107 #define MIGR_STATE_CHANGE 4
108 #define MIGR_REPAIR 5
109 __u8 migr_type; /* Initializing, Rebuilding, ... */
110 __u8 dirty;
111 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
112 __u16 verify_errors; /* number of mismatches */
113 __u16 bad_blocks; /* number of bad blocks during verify */
114 __u32 filler[4];
115 struct imsm_map map[1];
116 /* here comes another one if migr_state */
117 } __attribute__ ((packed));
118
119 struct imsm_dev {
120 __u8 volume[MAX_RAID_SERIAL_LEN];
121 __u32 size_low;
122 __u32 size_high;
123 #define DEV_BOOTABLE __cpu_to_le32(0x01)
124 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
125 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
126 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
127 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
128 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
129 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
130 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
131 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
132 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
133 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
134 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
135 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
136 __u32 status; /* Persistent RaidDev status */
137 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
138 __u8 migr_priority;
139 __u8 num_sub_vols;
140 __u8 tid;
141 __u8 cng_master_disk;
142 __u16 cache_policy;
143 __u8 cng_state;
144 __u8 cng_sub_state;
145 #define IMSM_DEV_FILLERS 10
146 __u32 filler[IMSM_DEV_FILLERS];
147 struct imsm_vol vol;
148 } __attribute__ ((packed));
149
150 struct imsm_super {
151 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
152 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
153 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
154 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
155 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
156 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
157 __u32 attributes; /* 0x34 - 0x37 */
158 __u8 num_disks; /* 0x38 Number of configured disks */
159 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
160 __u8 error_log_pos; /* 0x3A */
161 __u8 fill[1]; /* 0x3B */
162 __u32 cache_size; /* 0x3c - 0x40 in mb */
163 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
164 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
165 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
166 #define IMSM_FILLERS 35
167 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
168 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
169 /* here comes imsm_dev[num_raid_devs] */
170 /* here comes BBM logs */
171 } __attribute__ ((packed));
172
173 #define BBM_LOG_MAX_ENTRIES 254
174
175 struct bbm_log_entry {
176 __u64 defective_block_start;
177 #define UNREADABLE 0xFFFFFFFF
178 __u32 spare_block_offset;
179 __u16 remapped_marked_count;
180 __u16 disk_ordinal;
181 } __attribute__ ((__packed__));
182
183 struct bbm_log {
184 __u32 signature; /* 0xABADB10C */
185 __u32 entry_count;
186 __u32 reserved_spare_block_count; /* 0 */
187 __u32 reserved; /* 0xFFFF */
188 __u64 first_spare_lba;
189 struct bbm_log_entry mapped_block_entries[BBM_LOG_MAX_ENTRIES];
190 } __attribute__ ((__packed__));
191
192
193 #ifndef MDASSEMBLE
194 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
195 #endif
196
197 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
198
199 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
200
201 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
202 * be recovered using srcMap */
203 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
204 * already been migrated and must
205 * be recovered from checkpoint area */
206 struct migr_record {
207 __u32 rec_status; /* Status used to determine how to restart
208 * migration in case it aborts
209 * in some fashion */
210 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
211 __u32 family_num; /* Family number of MPB
212 * containing the RaidDev
213 * that is migrating */
214 __u32 ascending_migr; /* True if migrating in increasing
215 * order of lbas */
216 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
217 __u32 dest_depth_per_unit; /* Num member blocks each destMap
218 * member disk
219 * advances per unit-of-operation */
220 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
221 __u32 dest_1st_member_lba; /* First member lba on first
222 * stripe of destination */
223 __u32 num_migr_units; /* Total num migration units-of-op */
224 __u32 post_migr_vol_cap; /* Size of volume after
225 * migration completes */
226 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
227 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
228 * migration ckpt record was read from
229 * (for recovered migrations) */
230 } __attribute__ ((__packed__));
231
232 static __u8 migr_type(struct imsm_dev *dev)
233 {
234 if (dev->vol.migr_type == MIGR_VERIFY &&
235 dev->status & DEV_VERIFY_AND_FIX)
236 return MIGR_REPAIR;
237 else
238 return dev->vol.migr_type;
239 }
240
241 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
242 {
243 /* for compatibility with older oroms convert MIGR_REPAIR, into
244 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
245 */
246 if (migr_type == MIGR_REPAIR) {
247 dev->vol.migr_type = MIGR_VERIFY;
248 dev->status |= DEV_VERIFY_AND_FIX;
249 } else {
250 dev->vol.migr_type = migr_type;
251 dev->status &= ~DEV_VERIFY_AND_FIX;
252 }
253 }
254
255 static unsigned int sector_count(__u32 bytes)
256 {
257 return ((bytes + (512-1)) & (~(512-1))) / 512;
258 }
259
260 static unsigned int mpb_sectors(struct imsm_super *mpb)
261 {
262 return sector_count(__le32_to_cpu(mpb->mpb_size));
263 }
264
265 struct intel_dev {
266 struct imsm_dev *dev;
267 struct intel_dev *next;
268 unsigned index;
269 };
270
271 struct intel_hba {
272 enum sys_dev_type type;
273 char *path;
274 char *pci_id;
275 struct intel_hba *next;
276 };
277
278 enum action {
279 DISK_REMOVE = 1,
280 DISK_ADD
281 };
282 /* internal representation of IMSM metadata */
283 struct intel_super {
284 union {
285 void *buf; /* O_DIRECT buffer for reading/writing metadata */
286 struct imsm_super *anchor; /* immovable parameters */
287 };
288 union {
289 void *migr_rec_buf; /* buffer for I/O operations */
290 struct migr_record *migr_rec; /* migration record */
291 };
292 size_t len; /* size of the 'buf' allocation */
293 void *next_buf; /* for realloc'ing buf from the manager */
294 size_t next_len;
295 int updates_pending; /* count of pending updates for mdmon */
296 int current_vol; /* index of raid device undergoing creation */
297 __u32 create_offset; /* common start for 'current_vol' */
298 __u32 random; /* random data for seeding new family numbers */
299 struct intel_dev *devlist;
300 struct dl {
301 struct dl *next;
302 int index;
303 __u8 serial[MAX_RAID_SERIAL_LEN];
304 int major, minor;
305 char *devname;
306 struct imsm_disk disk;
307 int fd;
308 int extent_cnt;
309 struct extent *e; /* for determining freespace @ create */
310 int raiddisk; /* slot to fill in autolayout */
311 enum action action;
312 } *disks;
313 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
314 active */
315 struct dl *missing; /* disks removed while we weren't looking */
316 struct bbm_log *bbm_log;
317 struct intel_hba *hba; /* device path of the raid controller for this metadata */
318 const struct imsm_orom *orom; /* platform firmware support */
319 struct intel_super *next; /* (temp) list for disambiguating family_num */
320 };
321
322 struct intel_disk {
323 struct imsm_disk disk;
324 #define IMSM_UNKNOWN_OWNER (-1)
325 int owner;
326 struct intel_disk *next;
327 };
328
329 struct extent {
330 unsigned long long start, size;
331 };
332
333 /* definitions of reshape process types */
334 enum imsm_reshape_type {
335 CH_TAKEOVER,
336 CH_MIGRATION,
337 };
338
339 /* definition of messages passed to imsm_process_update */
340 enum imsm_update_type {
341 update_activate_spare,
342 update_create_array,
343 update_kill_array,
344 update_rename_array,
345 update_add_remove_disk,
346 update_reshape_container_disks,
347 update_reshape_migration,
348 update_takeover
349 };
350
351 struct imsm_update_activate_spare {
352 enum imsm_update_type type;
353 struct dl *dl;
354 int slot;
355 int array;
356 struct imsm_update_activate_spare *next;
357 };
358
359 struct geo_params {
360 int dev_id;
361 char *dev_name;
362 long long size;
363 int level;
364 int layout;
365 int chunksize;
366 int raid_disks;
367 };
368
369 enum takeover_direction {
370 R10_TO_R0,
371 R0_TO_R10
372 };
373 struct imsm_update_takeover {
374 enum imsm_update_type type;
375 int subarray;
376 enum takeover_direction direction;
377 };
378
379 struct imsm_update_reshape {
380 enum imsm_update_type type;
381 int old_raid_disks;
382 int new_raid_disks;
383
384 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
385 };
386
387 struct imsm_update_reshape_migration {
388 enum imsm_update_type type;
389 int old_raid_disks;
390 int new_raid_disks;
391 /* fields for array migration changes
392 */
393 int subdev;
394 int new_level;
395 int new_layout;
396 int new_chunksize;
397
398 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
399 };
400
401 struct disk_info {
402 __u8 serial[MAX_RAID_SERIAL_LEN];
403 };
404
405 struct imsm_update_create_array {
406 enum imsm_update_type type;
407 int dev_idx;
408 struct imsm_dev dev;
409 };
410
411 struct imsm_update_kill_array {
412 enum imsm_update_type type;
413 int dev_idx;
414 };
415
416 struct imsm_update_rename_array {
417 enum imsm_update_type type;
418 __u8 name[MAX_RAID_SERIAL_LEN];
419 int dev_idx;
420 };
421
422 struct imsm_update_add_remove_disk {
423 enum imsm_update_type type;
424 };
425
426
427 static const char *_sys_dev_type[] = {
428 [SYS_DEV_UNKNOWN] = "Unknown",
429 [SYS_DEV_SAS] = "SAS",
430 [SYS_DEV_SATA] = "SATA"
431 };
432
433 const char *get_sys_dev_type(enum sys_dev_type type)
434 {
435 if (type >= SYS_DEV_MAX)
436 type = SYS_DEV_UNKNOWN;
437
438 return _sys_dev_type[type];
439 }
440
441 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
442 {
443 struct intel_hba *result = malloc(sizeof(*result));
444 if (result) {
445 result->type = device->type;
446 result->path = strdup(device->path);
447 result->next = NULL;
448 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
449 result->pci_id++;
450 }
451 return result;
452 }
453
454 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
455 {
456 struct intel_hba *result=NULL;
457 for (result = hba; result; result = result->next) {
458 if (result->type == device->type && strcmp(result->path, device->path) == 0)
459 break;
460 }
461 return result;
462 }
463
464 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
465 {
466 struct intel_hba *hba;
467
468 /* check if disk attached to Intel HBA */
469 hba = find_intel_hba(super->hba, device);
470 if (hba != NULL)
471 return 1;
472 /* Check if HBA is already attached to super */
473 if (super->hba == NULL) {
474 super->hba = alloc_intel_hba(device);
475 return 1;
476 }
477
478 hba = super->hba;
479 /* Intel metadata allows for all disks attached to the same type HBA.
480 * Do not sypport odf HBA types mixing
481 */
482 if (device->type != hba->type)
483 return 2;
484
485 while (hba->next)
486 hba = hba->next;
487
488 hba->next = alloc_intel_hba(device);
489 return 1;
490 }
491
492 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
493 {
494 struct sys_dev *list, *elem, *prev;
495 char *disk_path;
496
497 if ((list = find_intel_devices()) == NULL)
498 return 0;
499
500 if (fd < 0)
501 disk_path = (char *) devname;
502 else
503 disk_path = diskfd_to_devpath(fd);
504
505 if (!disk_path) {
506 free_sys_dev(&list);
507 return 0;
508 }
509
510 for (prev = NULL, elem = list; elem; prev = elem, elem = elem->next) {
511 if (path_attached_to_hba(disk_path, elem->path)) {
512 if (prev == NULL)
513 list = list->next;
514 else
515 prev->next = elem->next;
516 elem->next = NULL;
517 if (disk_path != devname)
518 free(disk_path);
519 free_sys_dev(&list);
520 return elem;
521 }
522 }
523 if (disk_path != devname)
524 free(disk_path);
525 free_sys_dev(&list);
526
527 return NULL;
528 }
529
530
531 static int find_intel_hba_capability(int fd, struct intel_super *super,
532 char *devname);
533
534 static struct supertype *match_metadata_desc_imsm(char *arg)
535 {
536 struct supertype *st;
537
538 if (strcmp(arg, "imsm") != 0 &&
539 strcmp(arg, "default") != 0
540 )
541 return NULL;
542
543 st = malloc(sizeof(*st));
544 if (!st)
545 return NULL;
546 memset(st, 0, sizeof(*st));
547 st->container_dev = NoMdDev;
548 st->ss = &super_imsm;
549 st->max_devs = IMSM_MAX_DEVICES;
550 st->minor_version = 0;
551 st->sb = NULL;
552 return st;
553 }
554
555 #ifndef MDASSEMBLE
556 static __u8 *get_imsm_version(struct imsm_super *mpb)
557 {
558 return &mpb->sig[MPB_SIG_LEN];
559 }
560 #endif
561
562 /* retrieve a disk directly from the anchor when the anchor is known to be
563 * up-to-date, currently only at load time
564 */
565 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
566 {
567 if (index >= mpb->num_disks)
568 return NULL;
569 return &mpb->disk[index];
570 }
571
572 /* retrieve the disk description based on a index of the disk
573 * in the sub-array
574 */
575 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
576 {
577 struct dl *d;
578
579 for (d = super->disks; d; d = d->next)
580 if (d->index == index)
581 return d;
582
583 return NULL;
584 }
585 /* retrieve a disk from the parsed metadata */
586 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
587 {
588 struct dl *dl;
589
590 dl = get_imsm_dl_disk(super, index);
591 if (dl)
592 return &dl->disk;
593
594 return NULL;
595 }
596
597 /* generate a checksum directly from the anchor when the anchor is known to be
598 * up-to-date, currently only at load or write_super after coalescing
599 */
600 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
601 {
602 __u32 end = mpb->mpb_size / sizeof(end);
603 __u32 *p = (__u32 *) mpb;
604 __u32 sum = 0;
605
606 while (end--) {
607 sum += __le32_to_cpu(*p);
608 p++;
609 }
610
611 return sum - __le32_to_cpu(mpb->check_sum);
612 }
613
614 static size_t sizeof_imsm_map(struct imsm_map *map)
615 {
616 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
617 }
618
619 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
620 {
621 /* A device can have 2 maps if it is in the middle of a migration.
622 * If second_map is:
623 * 0 - we return the first map
624 * 1 - we return the second map if it exists, else NULL
625 * -1 - we return the second map if it exists, else the first
626 */
627 struct imsm_map *map = &dev->vol.map[0];
628
629 if (second_map == 1 && !dev->vol.migr_state)
630 return NULL;
631 else if (second_map == 1 ||
632 (second_map < 0 && dev->vol.migr_state)) {
633 void *ptr = map;
634
635 return ptr + sizeof_imsm_map(map);
636 } else
637 return map;
638
639 }
640
641 /* return the size of the device.
642 * migr_state increases the returned size if map[0] were to be duplicated
643 */
644 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
645 {
646 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
647 sizeof_imsm_map(get_imsm_map(dev, 0));
648
649 /* migrating means an additional map */
650 if (dev->vol.migr_state)
651 size += sizeof_imsm_map(get_imsm_map(dev, 1));
652 else if (migr_state)
653 size += sizeof_imsm_map(get_imsm_map(dev, 0));
654
655 return size;
656 }
657
658 #ifndef MDASSEMBLE
659 /* retrieve disk serial number list from a metadata update */
660 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
661 {
662 void *u = update;
663 struct disk_info *inf;
664
665 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
666 sizeof_imsm_dev(&update->dev, 0);
667
668 return inf;
669 }
670 #endif
671
672 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
673 {
674 int offset;
675 int i;
676 void *_mpb = mpb;
677
678 if (index >= mpb->num_raid_devs)
679 return NULL;
680
681 /* devices start after all disks */
682 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
683
684 for (i = 0; i <= index; i++)
685 if (i == index)
686 return _mpb + offset;
687 else
688 offset += sizeof_imsm_dev(_mpb + offset, 0);
689
690 return NULL;
691 }
692
693 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
694 {
695 struct intel_dev *dv;
696
697 if (index >= super->anchor->num_raid_devs)
698 return NULL;
699 for (dv = super->devlist; dv; dv = dv->next)
700 if (dv->index == index)
701 return dv->dev;
702 return NULL;
703 }
704
705 /*
706 * for second_map:
707 * == 0 get first map
708 * == 1 get second map
709 * == -1 than get map according to the current migr_state
710 */
711 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
712 int slot,
713 int second_map)
714 {
715 struct imsm_map *map;
716
717 map = get_imsm_map(dev, second_map);
718
719 /* top byte identifies disk under rebuild */
720 return __le32_to_cpu(map->disk_ord_tbl[slot]);
721 }
722
723 #define ord_to_idx(ord) (((ord) << 8) >> 8)
724 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
725 {
726 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
727
728 return ord_to_idx(ord);
729 }
730
731 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
732 {
733 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
734 }
735
736 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
737 {
738 int slot;
739 __u32 ord;
740
741 for (slot = 0; slot < map->num_members; slot++) {
742 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
743 if (ord_to_idx(ord) == idx)
744 return slot;
745 }
746
747 return -1;
748 }
749
750 static int get_imsm_raid_level(struct imsm_map *map)
751 {
752 if (map->raid_level == 1) {
753 if (map->num_members == 2)
754 return 1;
755 else
756 return 10;
757 }
758
759 return map->raid_level;
760 }
761
762 static int cmp_extent(const void *av, const void *bv)
763 {
764 const struct extent *a = av;
765 const struct extent *b = bv;
766 if (a->start < b->start)
767 return -1;
768 if (a->start > b->start)
769 return 1;
770 return 0;
771 }
772
773 static int count_memberships(struct dl *dl, struct intel_super *super)
774 {
775 int memberships = 0;
776 int i;
777
778 for (i = 0; i < super->anchor->num_raid_devs; i++) {
779 struct imsm_dev *dev = get_imsm_dev(super, i);
780 struct imsm_map *map = get_imsm_map(dev, 0);
781
782 if (get_imsm_disk_slot(map, dl->index) >= 0)
783 memberships++;
784 }
785
786 return memberships;
787 }
788
789 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
790 {
791 /* find a list of used extents on the given physical device */
792 struct extent *rv, *e;
793 int i;
794 int memberships = count_memberships(dl, super);
795 __u32 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
796
797 rv = malloc(sizeof(struct extent) * (memberships + 1));
798 if (!rv)
799 return NULL;
800 e = rv;
801
802 for (i = 0; i < super->anchor->num_raid_devs; i++) {
803 struct imsm_dev *dev = get_imsm_dev(super, i);
804 struct imsm_map *map = get_imsm_map(dev, 0);
805
806 if (get_imsm_disk_slot(map, dl->index) >= 0) {
807 e->start = __le32_to_cpu(map->pba_of_lba0);
808 e->size = __le32_to_cpu(map->blocks_per_member);
809 e++;
810 }
811 }
812 qsort(rv, memberships, sizeof(*rv), cmp_extent);
813
814 /* determine the start of the metadata
815 * when no raid devices are defined use the default
816 * ...otherwise allow the metadata to truncate the value
817 * as is the case with older versions of imsm
818 */
819 if (memberships) {
820 struct extent *last = &rv[memberships - 1];
821 __u32 remainder;
822
823 remainder = __le32_to_cpu(dl->disk.total_blocks) -
824 (last->start + last->size);
825 /* round down to 1k block to satisfy precision of the kernel
826 * 'size' interface
827 */
828 remainder &= ~1UL;
829 /* make sure remainder is still sane */
830 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
831 remainder = ROUND_UP(super->len, 512) >> 9;
832 if (reservation > remainder)
833 reservation = remainder;
834 }
835 e->start = __le32_to_cpu(dl->disk.total_blocks) - reservation;
836 e->size = 0;
837 return rv;
838 }
839
840 /* try to determine how much space is reserved for metadata from
841 * the last get_extents() entry, otherwise fallback to the
842 * default
843 */
844 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
845 {
846 struct extent *e;
847 int i;
848 __u32 rv;
849
850 /* for spares just return a minimal reservation which will grow
851 * once the spare is picked up by an array
852 */
853 if (dl->index == -1)
854 return MPB_SECTOR_CNT;
855
856 e = get_extents(super, dl);
857 if (!e)
858 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
859
860 /* scroll to last entry */
861 for (i = 0; e[i].size; i++)
862 continue;
863
864 rv = __le32_to_cpu(dl->disk.total_blocks) - e[i].start;
865
866 free(e);
867
868 return rv;
869 }
870
871 static int is_spare(struct imsm_disk *disk)
872 {
873 return (disk->status & SPARE_DISK) == SPARE_DISK;
874 }
875
876 static int is_configured(struct imsm_disk *disk)
877 {
878 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
879 }
880
881 static int is_failed(struct imsm_disk *disk)
882 {
883 return (disk->status & FAILED_DISK) == FAILED_DISK;
884 }
885
886 /* Return minimum size of a spare that can be used in this array*/
887 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
888 {
889 struct intel_super *super = st->sb;
890 struct dl *dl;
891 struct extent *e;
892 int i;
893 unsigned long long rv = 0;
894
895 if (!super)
896 return rv;
897 /* find first active disk in array */
898 dl = super->disks;
899 while (dl && (is_failed(&dl->disk) || dl->index == -1))
900 dl = dl->next;
901 if (!dl)
902 return rv;
903 /* find last lba used by subarrays */
904 e = get_extents(super, dl);
905 if (!e)
906 return rv;
907 for (i = 0; e[i].size; i++)
908 continue;
909 if (i > 0)
910 rv = e[i-1].start + e[i-1].size;
911 free(e);
912 /* add the amount of space needed for metadata */
913 rv = rv + MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
914 return rv * 512;
915 }
916
917 #ifndef MDASSEMBLE
918 static __u64 blocks_per_migr_unit(struct intel_super *super,
919 struct imsm_dev *dev);
920
921 static void print_imsm_dev(struct intel_super *super,
922 struct imsm_dev *dev,
923 char *uuid,
924 int disk_idx)
925 {
926 __u64 sz;
927 int slot, i;
928 struct imsm_map *map = get_imsm_map(dev, 0);
929 struct imsm_map *map2 = get_imsm_map(dev, 1);
930 __u32 ord;
931
932 printf("\n");
933 printf("[%.16s]:\n", dev->volume);
934 printf(" UUID : %s\n", uuid);
935 printf(" RAID Level : %d", get_imsm_raid_level(map));
936 if (map2)
937 printf(" <-- %d", get_imsm_raid_level(map2));
938 printf("\n");
939 printf(" Members : %d", map->num_members);
940 if (map2)
941 printf(" <-- %d", map2->num_members);
942 printf("\n");
943 printf(" Slots : [");
944 for (i = 0; i < map->num_members; i++) {
945 ord = get_imsm_ord_tbl_ent(dev, i, 0);
946 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
947 }
948 printf("]");
949 if (map2) {
950 printf(" <-- [");
951 for (i = 0; i < map2->num_members; i++) {
952 ord = get_imsm_ord_tbl_ent(dev, i, 1);
953 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
954 }
955 printf("]");
956 }
957 printf("\n");
958 printf(" Failed disk : ");
959 if (map->failed_disk_num == 0xff)
960 printf("none");
961 else
962 printf("%i", map->failed_disk_num);
963 printf("\n");
964 slot = get_imsm_disk_slot(map, disk_idx);
965 if (slot >= 0) {
966 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
967 printf(" This Slot : %d%s\n", slot,
968 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
969 } else
970 printf(" This Slot : ?\n");
971 sz = __le32_to_cpu(dev->size_high);
972 sz <<= 32;
973 sz += __le32_to_cpu(dev->size_low);
974 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
975 human_size(sz * 512));
976 sz = __le32_to_cpu(map->blocks_per_member);
977 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
978 human_size(sz * 512));
979 printf(" Sector Offset : %u\n",
980 __le32_to_cpu(map->pba_of_lba0));
981 printf(" Num Stripes : %u\n",
982 __le32_to_cpu(map->num_data_stripes));
983 printf(" Chunk Size : %u KiB",
984 __le16_to_cpu(map->blocks_per_strip) / 2);
985 if (map2)
986 printf(" <-- %u KiB",
987 __le16_to_cpu(map2->blocks_per_strip) / 2);
988 printf("\n");
989 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
990 printf(" Migrate State : ");
991 if (dev->vol.migr_state) {
992 if (migr_type(dev) == MIGR_INIT)
993 printf("initialize\n");
994 else if (migr_type(dev) == MIGR_REBUILD)
995 printf("rebuild\n");
996 else if (migr_type(dev) == MIGR_VERIFY)
997 printf("check\n");
998 else if (migr_type(dev) == MIGR_GEN_MIGR)
999 printf("general migration\n");
1000 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1001 printf("state change\n");
1002 else if (migr_type(dev) == MIGR_REPAIR)
1003 printf("repair\n");
1004 else
1005 printf("<unknown:%d>\n", migr_type(dev));
1006 } else
1007 printf("idle\n");
1008 printf(" Map State : %s", map_state_str[map->map_state]);
1009 if (dev->vol.migr_state) {
1010 struct imsm_map *map = get_imsm_map(dev, 1);
1011
1012 printf(" <-- %s", map_state_str[map->map_state]);
1013 printf("\n Checkpoint : %u (%llu)",
1014 __le32_to_cpu(dev->vol.curr_migr_unit),
1015 (unsigned long long)blocks_per_migr_unit(super, dev));
1016 }
1017 printf("\n");
1018 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
1019 }
1020
1021 static void print_imsm_disk(struct imsm_super *mpb, int index, __u32 reserved)
1022 {
1023 struct imsm_disk *disk = __get_imsm_disk(mpb, index);
1024 char str[MAX_RAID_SERIAL_LEN + 1];
1025 __u64 sz;
1026
1027 if (index < 0 || !disk)
1028 return;
1029
1030 printf("\n");
1031 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1032 printf(" Disk%02d Serial : %s\n", index, str);
1033 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1034 is_configured(disk) ? " active" : "",
1035 is_failed(disk) ? " failed" : "");
1036 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1037 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1038 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1039 human_size(sz * 512));
1040 }
1041
1042 static int is_gen_migration(struct imsm_dev *dev);
1043
1044 void examine_migr_rec_imsm(struct intel_super *super)
1045 {
1046 struct migr_record *migr_rec = super->migr_rec;
1047 struct imsm_super *mpb = super->anchor;
1048 int i;
1049
1050 for (i = 0; i < mpb->num_raid_devs; i++) {
1051 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1052 if (is_gen_migration(dev) == 0)
1053 continue;
1054
1055 printf("\nMigration Record Information:");
1056 if (super->disks->index > 1) {
1057 printf(" Empty\n ");
1058 printf("Examine one of first two disks in array\n");
1059 break;
1060 }
1061 printf("\n Status : ");
1062 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1063 printf("Normal\n");
1064 else
1065 printf("Contains Data\n");
1066 printf(" Current Unit : %u\n",
1067 __le32_to_cpu(migr_rec->curr_migr_unit));
1068 printf(" Family : %u\n",
1069 __le32_to_cpu(migr_rec->family_num));
1070 printf(" Ascending : %u\n",
1071 __le32_to_cpu(migr_rec->ascending_migr));
1072 printf(" Blocks Per Unit : %u\n",
1073 __le32_to_cpu(migr_rec->blocks_per_unit));
1074 printf(" Dest. Depth Per Unit : %u\n",
1075 __le32_to_cpu(migr_rec->dest_depth_per_unit));
1076 printf(" Checkpoint Area pba : %u\n",
1077 __le32_to_cpu(migr_rec->ckpt_area_pba));
1078 printf(" First member lba : %u\n",
1079 __le32_to_cpu(migr_rec->dest_1st_member_lba));
1080 printf(" Total Number of Units : %u\n",
1081 __le32_to_cpu(migr_rec->num_migr_units));
1082 printf(" Size of volume : %u\n",
1083 __le32_to_cpu(migr_rec->post_migr_vol_cap));
1084 printf(" Expansion space for LBA64 : %u\n",
1085 __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1086 printf(" Record was read from : %u\n",
1087 __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1088
1089 break;
1090 }
1091 }
1092
1093 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
1094
1095 static void examine_super_imsm(struct supertype *st, char *homehost)
1096 {
1097 struct intel_super *super = st->sb;
1098 struct imsm_super *mpb = super->anchor;
1099 char str[MAX_SIGNATURE_LENGTH];
1100 int i;
1101 struct mdinfo info;
1102 char nbuf[64];
1103 __u32 sum;
1104 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1105 struct dl *dl;
1106
1107 snprintf(str, MPB_SIG_LEN, "%s", mpb->sig);
1108 printf(" Magic : %s\n", str);
1109 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1110 printf(" Version : %s\n", get_imsm_version(mpb));
1111 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1112 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1113 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1114 getinfo_super_imsm(st, &info, NULL);
1115 fname_from_uuid(st, &info, nbuf, ':');
1116 printf(" UUID : %s\n", nbuf + 5);
1117 sum = __le32_to_cpu(mpb->check_sum);
1118 printf(" Checksum : %08x %s\n", sum,
1119 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1120 printf(" MPB Sectors : %d\n", mpb_sectors(mpb));
1121 printf(" Disks : %d\n", mpb->num_disks);
1122 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
1123 print_imsm_disk(mpb, super->disks->index, reserved);
1124 if (super->bbm_log) {
1125 struct bbm_log *log = super->bbm_log;
1126
1127 printf("\n");
1128 printf("Bad Block Management Log:\n");
1129 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1130 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1131 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1132 printf(" Spare Blocks : %d\n", __le32_to_cpu(log->reserved_spare_block_count));
1133 printf(" First Spare : %llx\n",
1134 (unsigned long long) __le64_to_cpu(log->first_spare_lba));
1135 }
1136 for (i = 0; i < mpb->num_raid_devs; i++) {
1137 struct mdinfo info;
1138 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1139
1140 super->current_vol = i;
1141 getinfo_super_imsm(st, &info, NULL);
1142 fname_from_uuid(st, &info, nbuf, ':');
1143 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
1144 }
1145 for (i = 0; i < mpb->num_disks; i++) {
1146 if (i == super->disks->index)
1147 continue;
1148 print_imsm_disk(mpb, i, reserved);
1149 }
1150 for (dl = super->disks ; dl; dl = dl->next) {
1151 struct imsm_disk *disk;
1152 char str[MAX_RAID_SERIAL_LEN + 1];
1153 __u64 sz;
1154
1155 if (dl->index >= 0)
1156 continue;
1157
1158 disk = &dl->disk;
1159 printf("\n");
1160 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1161 printf(" Disk Serial : %s\n", str);
1162 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1163 is_configured(disk) ? " active" : "",
1164 is_failed(disk) ? " failed" : "");
1165 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1166 sz = __le32_to_cpu(disk->total_blocks) - reserved;
1167 printf(" Usable Size : %llu%s\n", (unsigned long long)sz,
1168 human_size(sz * 512));
1169 }
1170
1171 examine_migr_rec_imsm(super);
1172 }
1173
1174 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1175 {
1176 /* We just write a generic IMSM ARRAY entry */
1177 struct mdinfo info;
1178 char nbuf[64];
1179 struct intel_super *super = st->sb;
1180
1181 if (!super->anchor->num_raid_devs) {
1182 printf("ARRAY metadata=imsm\n");
1183 return;
1184 }
1185
1186 getinfo_super_imsm(st, &info, NULL);
1187 fname_from_uuid(st, &info, nbuf, ':');
1188 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1189 }
1190
1191 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1192 {
1193 /* We just write a generic IMSM ARRAY entry */
1194 struct mdinfo info;
1195 char nbuf[64];
1196 char nbuf1[64];
1197 struct intel_super *super = st->sb;
1198 int i;
1199
1200 if (!super->anchor->num_raid_devs)
1201 return;
1202
1203 getinfo_super_imsm(st, &info, NULL);
1204 fname_from_uuid(st, &info, nbuf, ':');
1205 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1206 struct imsm_dev *dev = get_imsm_dev(super, i);
1207
1208 super->current_vol = i;
1209 getinfo_super_imsm(st, &info, NULL);
1210 fname_from_uuid(st, &info, nbuf1, ':');
1211 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1212 dev->volume, nbuf + 5, i, nbuf1 + 5);
1213 }
1214 }
1215
1216 static void export_examine_super_imsm(struct supertype *st)
1217 {
1218 struct intel_super *super = st->sb;
1219 struct imsm_super *mpb = super->anchor;
1220 struct mdinfo info;
1221 char nbuf[64];
1222
1223 getinfo_super_imsm(st, &info, NULL);
1224 fname_from_uuid(st, &info, nbuf, ':');
1225 printf("MD_METADATA=imsm\n");
1226 printf("MD_LEVEL=container\n");
1227 printf("MD_UUID=%s\n", nbuf+5);
1228 printf("MD_DEVICES=%u\n", mpb->num_disks);
1229 }
1230
1231 static void detail_super_imsm(struct supertype *st, char *homehost)
1232 {
1233 struct mdinfo info;
1234 char nbuf[64];
1235
1236 getinfo_super_imsm(st, &info, NULL);
1237 fname_from_uuid(st, &info, nbuf, ':');
1238 printf("\n UUID : %s\n", nbuf + 5);
1239 }
1240
1241 static void brief_detail_super_imsm(struct supertype *st)
1242 {
1243 struct mdinfo info;
1244 char nbuf[64];
1245 getinfo_super_imsm(st, &info, NULL);
1246 fname_from_uuid(st, &info, nbuf, ':');
1247 printf(" UUID=%s", nbuf + 5);
1248 }
1249
1250 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
1251 static void fd2devname(int fd, char *name);
1252
1253 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
1254 {
1255 /* dump an unsorted list of devices attached to AHCI Intel storage
1256 * controller, as well as non-connected ports
1257 */
1258 int hba_len = strlen(hba_path) + 1;
1259 struct dirent *ent;
1260 DIR *dir;
1261 char *path = NULL;
1262 int err = 0;
1263 unsigned long port_mask = (1 << port_count) - 1;
1264
1265 if (port_count > (int)sizeof(port_mask) * 8) {
1266 if (verbose)
1267 fprintf(stderr, Name ": port_count %d out of range\n", port_count);
1268 return 2;
1269 }
1270
1271 /* scroll through /sys/dev/block looking for devices attached to
1272 * this hba
1273 */
1274 dir = opendir("/sys/dev/block");
1275 for (ent = dir ? readdir(dir) : NULL; ent; ent = readdir(dir)) {
1276 int fd;
1277 char model[64];
1278 char vendor[64];
1279 char buf[1024];
1280 int major, minor;
1281 char *device;
1282 char *c;
1283 int port;
1284 int type;
1285
1286 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
1287 continue;
1288 path = devt_to_devpath(makedev(major, minor));
1289 if (!path)
1290 continue;
1291 if (!path_attached_to_hba(path, hba_path)) {
1292 free(path);
1293 path = NULL;
1294 continue;
1295 }
1296
1297 /* retrieve the scsi device type */
1298 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
1299 if (verbose)
1300 fprintf(stderr, Name ": failed to allocate 'device'\n");
1301 err = 2;
1302 break;
1303 }
1304 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
1305 if (load_sys(device, buf) != 0) {
1306 if (verbose)
1307 fprintf(stderr, Name ": failed to read device type for %s\n",
1308 path);
1309 err = 2;
1310 free(device);
1311 break;
1312 }
1313 type = strtoul(buf, NULL, 10);
1314
1315 /* if it's not a disk print the vendor and model */
1316 if (!(type == 0 || type == 7 || type == 14)) {
1317 vendor[0] = '\0';
1318 model[0] = '\0';
1319 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
1320 if (load_sys(device, buf) == 0) {
1321 strncpy(vendor, buf, sizeof(vendor));
1322 vendor[sizeof(vendor) - 1] = '\0';
1323 c = (char *) &vendor[sizeof(vendor) - 1];
1324 while (isspace(*c) || *c == '\0')
1325 *c-- = '\0';
1326
1327 }
1328 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
1329 if (load_sys(device, buf) == 0) {
1330 strncpy(model, buf, sizeof(model));
1331 model[sizeof(model) - 1] = '\0';
1332 c = (char *) &model[sizeof(model) - 1];
1333 while (isspace(*c) || *c == '\0')
1334 *c-- = '\0';
1335 }
1336
1337 if (vendor[0] && model[0])
1338 sprintf(buf, "%.64s %.64s", vendor, model);
1339 else
1340 switch (type) { /* numbers from hald/linux/device.c */
1341 case 1: sprintf(buf, "tape"); break;
1342 case 2: sprintf(buf, "printer"); break;
1343 case 3: sprintf(buf, "processor"); break;
1344 case 4:
1345 case 5: sprintf(buf, "cdrom"); break;
1346 case 6: sprintf(buf, "scanner"); break;
1347 case 8: sprintf(buf, "media_changer"); break;
1348 case 9: sprintf(buf, "comm"); break;
1349 case 12: sprintf(buf, "raid"); break;
1350 default: sprintf(buf, "unknown");
1351 }
1352 } else
1353 buf[0] = '\0';
1354 free(device);
1355
1356 /* chop device path to 'host%d' and calculate the port number */
1357 c = strchr(&path[hba_len], '/');
1358 if (!c) {
1359 if (verbose)
1360 fprintf(stderr, Name ": %s - invalid path name\n", path + hba_len);
1361 err = 2;
1362 break;
1363 }
1364 *c = '\0';
1365 if (sscanf(&path[hba_len], "host%d", &port) == 1)
1366 port -= host_base;
1367 else {
1368 if (verbose) {
1369 *c = '/'; /* repair the full string */
1370 fprintf(stderr, Name ": failed to determine port number for %s\n",
1371 path);
1372 }
1373 err = 2;
1374 break;
1375 }
1376
1377 /* mark this port as used */
1378 port_mask &= ~(1 << port);
1379
1380 /* print out the device information */
1381 if (buf[0]) {
1382 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
1383 continue;
1384 }
1385
1386 fd = dev_open(ent->d_name, O_RDONLY);
1387 if (fd < 0)
1388 printf(" Port%d : - disk info unavailable -\n", port);
1389 else {
1390 fd2devname(fd, buf);
1391 printf(" Port%d : %s", port, buf);
1392 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
1393 printf(" (%s)\n", buf);
1394 else
1395 printf("()\n");
1396 }
1397 close(fd);
1398 free(path);
1399 path = NULL;
1400 }
1401 if (path)
1402 free(path);
1403 if (dir)
1404 closedir(dir);
1405 if (err == 0) {
1406 int i;
1407
1408 for (i = 0; i < port_count; i++)
1409 if (port_mask & (1 << i))
1410 printf(" Port%d : - no device attached -\n", i);
1411 }
1412
1413 return err;
1414 }
1415
1416
1417
1418 static void print_found_intel_controllers(struct sys_dev *elem)
1419 {
1420 for (; elem; elem = elem->next) {
1421 fprintf(stderr, Name ": found Intel(R) ");
1422 if (elem->type == SYS_DEV_SATA)
1423 fprintf(stderr, "SATA ");
1424 else if (elem->type == SYS_DEV_SAS)
1425 fprintf(stderr, "SAS ");
1426 fprintf(stderr, "RAID controller");
1427 if (elem->pci_id)
1428 fprintf(stderr, " at %s", elem->pci_id);
1429 fprintf(stderr, ".\n");
1430 }
1431 fflush(stderr);
1432 }
1433
1434 static int ahci_get_port_count(const char *hba_path, int *port_count)
1435 {
1436 struct dirent *ent;
1437 DIR *dir;
1438 int host_base = -1;
1439
1440 *port_count = 0;
1441 if ((dir = opendir(hba_path)) == NULL)
1442 return -1;
1443
1444 for (ent = readdir(dir); ent; ent = readdir(dir)) {
1445 int host;
1446
1447 if (sscanf(ent->d_name, "host%d", &host) != 1)
1448 continue;
1449 if (*port_count == 0)
1450 host_base = host;
1451 else if (host < host_base)
1452 host_base = host;
1453
1454 if (host + 1 > *port_count + host_base)
1455 *port_count = host + 1 - host_base;
1456 }
1457 closedir(dir);
1458 return host_base;
1459 }
1460
1461 static void print_imsm_capability(const struct imsm_orom *orom)
1462 {
1463 printf(" Platform : Intel(R) Matrix Storage Manager\n");
1464 printf(" Version : %d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
1465 orom->hotfix_ver, orom->build);
1466 printf(" RAID Levels :%s%s%s%s%s\n",
1467 imsm_orom_has_raid0(orom) ? " raid0" : "",
1468 imsm_orom_has_raid1(orom) ? " raid1" : "",
1469 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
1470 imsm_orom_has_raid10(orom) ? " raid10" : "",
1471 imsm_orom_has_raid5(orom) ? " raid5" : "");
1472 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1473 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
1474 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
1475 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
1476 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
1477 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
1478 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
1479 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
1480 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
1481 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
1482 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
1483 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
1484 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
1485 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
1486 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
1487 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
1488 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
1489 printf(" Max Disks : %d\n", orom->tds);
1490 printf(" Max Volumes : %d\n", orom->vpa);
1491 return;
1492 }
1493
1494 static int detail_platform_imsm(int verbose, int enumerate_only)
1495 {
1496 /* There are two components to imsm platform support, the ahci SATA
1497 * controller and the option-rom. To find the SATA controller we
1498 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
1499 * controller with the Intel vendor id is present. This approach
1500 * allows mdadm to leverage the kernel's ahci detection logic, with the
1501 * caveat that if ahci.ko is not loaded mdadm will not be able to
1502 * detect platform raid capabilities. The option-rom resides in a
1503 * platform "Adapter ROM". We scan for its signature to retrieve the
1504 * platform capabilities. If raid support is disabled in the BIOS the
1505 * option-rom capability structure will not be available.
1506 */
1507 const struct imsm_orom *orom;
1508 struct sys_dev *list, *hba;
1509 int host_base = 0;
1510 int port_count = 0;
1511 int result=0;
1512
1513 if (enumerate_only) {
1514 if (check_env("IMSM_NO_PLATFORM"))
1515 return 0;
1516 list = find_intel_devices();
1517 if (!list)
1518 return 2;
1519 for (hba = list; hba; hba = hba->next) {
1520 orom = find_imsm_capability(hba->type);
1521 if (!orom) {
1522 result = 2;
1523 break;
1524 }
1525 }
1526 free_sys_dev(&list);
1527 return result;
1528 }
1529
1530 list = find_intel_devices();
1531 if (!list) {
1532 if (verbose)
1533 fprintf(stderr, Name ": no active Intel(R) RAID "
1534 "controller found.\n");
1535 free_sys_dev(&list);
1536 return 2;
1537 } else if (verbose)
1538 print_found_intel_controllers(list);
1539
1540 for (hba = list; hba; hba = hba->next) {
1541 orom = find_imsm_capability(hba->type);
1542 if (!orom)
1543 fprintf(stderr, Name ": imsm capabilities not found for controller: %s (type %s)\n",
1544 hba->path, get_sys_dev_type(hba->type));
1545 else
1546 print_imsm_capability(orom);
1547 }
1548
1549 for (hba = list; hba; hba = hba->next) {
1550 printf(" I/O Controller : %s (%s)\n",
1551 hba->path, get_sys_dev_type(hba->type));
1552
1553 if (hba->type == SYS_DEV_SATA) {
1554 host_base = ahci_get_port_count(hba->path, &port_count);
1555 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
1556 if (verbose)
1557 fprintf(stderr, Name ": failed to enumerate "
1558 "ports on SATA controller at %s.", hba->pci_id);
1559 result |= 2;
1560 }
1561 }
1562 }
1563
1564 free_sys_dev(&list);
1565 return result;
1566 }
1567 #endif
1568
1569 static int match_home_imsm(struct supertype *st, char *homehost)
1570 {
1571 /* the imsm metadata format does not specify any host
1572 * identification information. We return -1 since we can never
1573 * confirm nor deny whether a given array is "meant" for this
1574 * host. We rely on compare_super and the 'family_num' fields to
1575 * exclude member disks that do not belong, and we rely on
1576 * mdadm.conf to specify the arrays that should be assembled.
1577 * Auto-assembly may still pick up "foreign" arrays.
1578 */
1579
1580 return -1;
1581 }
1582
1583 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
1584 {
1585 /* The uuid returned here is used for:
1586 * uuid to put into bitmap file (Create, Grow)
1587 * uuid for backup header when saving critical section (Grow)
1588 * comparing uuids when re-adding a device into an array
1589 * In these cases the uuid required is that of the data-array,
1590 * not the device-set.
1591 * uuid to recognise same set when adding a missing device back
1592 * to an array. This is a uuid for the device-set.
1593 *
1594 * For each of these we can make do with a truncated
1595 * or hashed uuid rather than the original, as long as
1596 * everyone agrees.
1597 * In each case the uuid required is that of the data-array,
1598 * not the device-set.
1599 */
1600 /* imsm does not track uuid's so we synthesis one using sha1 on
1601 * - The signature (Which is constant for all imsm array, but no matter)
1602 * - the orig_family_num of the container
1603 * - the index number of the volume
1604 * - the 'serial' number of the volume.
1605 * Hopefully these are all constant.
1606 */
1607 struct intel_super *super = st->sb;
1608
1609 char buf[20];
1610 struct sha1_ctx ctx;
1611 struct imsm_dev *dev = NULL;
1612 __u32 family_num;
1613
1614 /* some mdadm versions failed to set ->orig_family_num, in which
1615 * case fall back to ->family_num. orig_family_num will be
1616 * fixed up with the first metadata update.
1617 */
1618 family_num = super->anchor->orig_family_num;
1619 if (family_num == 0)
1620 family_num = super->anchor->family_num;
1621 sha1_init_ctx(&ctx);
1622 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
1623 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
1624 if (super->current_vol >= 0)
1625 dev = get_imsm_dev(super, super->current_vol);
1626 if (dev) {
1627 __u32 vol = super->current_vol;
1628 sha1_process_bytes(&vol, sizeof(vol), &ctx);
1629 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
1630 }
1631 sha1_finish_ctx(&ctx, buf);
1632 memcpy(uuid, buf, 4*4);
1633 }
1634
1635 #if 0
1636 static void
1637 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
1638 {
1639 __u8 *v = get_imsm_version(mpb);
1640 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
1641 char major[] = { 0, 0, 0 };
1642 char minor[] = { 0 ,0, 0 };
1643 char patch[] = { 0, 0, 0 };
1644 char *ver_parse[] = { major, minor, patch };
1645 int i, j;
1646
1647 i = j = 0;
1648 while (*v != '\0' && v < end) {
1649 if (*v != '.' && j < 2)
1650 ver_parse[i][j++] = *v;
1651 else {
1652 i++;
1653 j = 0;
1654 }
1655 v++;
1656 }
1657
1658 *m = strtol(minor, NULL, 0);
1659 *p = strtol(patch, NULL, 0);
1660 }
1661 #endif
1662
1663 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
1664 {
1665 /* migr_strip_size when repairing or initializing parity */
1666 struct imsm_map *map = get_imsm_map(dev, 0);
1667 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1668
1669 switch (get_imsm_raid_level(map)) {
1670 case 5:
1671 case 10:
1672 return chunk;
1673 default:
1674 return 128*1024 >> 9;
1675 }
1676 }
1677
1678 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
1679 {
1680 /* migr_strip_size when rebuilding a degraded disk, no idea why
1681 * this is different than migr_strip_size_resync(), but it's good
1682 * to be compatible
1683 */
1684 struct imsm_map *map = get_imsm_map(dev, 1);
1685 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1686
1687 switch (get_imsm_raid_level(map)) {
1688 case 1:
1689 case 10:
1690 if (map->num_members % map->num_domains == 0)
1691 return 128*1024 >> 9;
1692 else
1693 return chunk;
1694 case 5:
1695 return max((__u32) 64*1024 >> 9, chunk);
1696 default:
1697 return 128*1024 >> 9;
1698 }
1699 }
1700
1701 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
1702 {
1703 struct imsm_map *lo = get_imsm_map(dev, 0);
1704 struct imsm_map *hi = get_imsm_map(dev, 1);
1705 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
1706 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
1707
1708 return max((__u32) 1, hi_chunk / lo_chunk);
1709 }
1710
1711 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
1712 {
1713 struct imsm_map *lo = get_imsm_map(dev, 0);
1714 int level = get_imsm_raid_level(lo);
1715
1716 if (level == 1 || level == 10) {
1717 struct imsm_map *hi = get_imsm_map(dev, 1);
1718
1719 return hi->num_domains;
1720 } else
1721 return num_stripes_per_unit_resync(dev);
1722 }
1723
1724 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
1725 {
1726 /* named 'imsm_' because raid0, raid1 and raid10
1727 * counter-intuitively have the same number of data disks
1728 */
1729 struct imsm_map *map = get_imsm_map(dev, second_map);
1730
1731 switch (get_imsm_raid_level(map)) {
1732 case 0:
1733 case 1:
1734 case 10:
1735 return map->num_members;
1736 case 5:
1737 return map->num_members - 1;
1738 default:
1739 dprintf("%s: unsupported raid level\n", __func__);
1740 return 0;
1741 }
1742 }
1743
1744 static __u32 parity_segment_depth(struct imsm_dev *dev)
1745 {
1746 struct imsm_map *map = get_imsm_map(dev, 0);
1747 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1748
1749 switch(get_imsm_raid_level(map)) {
1750 case 1:
1751 case 10:
1752 return chunk * map->num_domains;
1753 case 5:
1754 return chunk * map->num_members;
1755 default:
1756 return chunk;
1757 }
1758 }
1759
1760 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
1761 {
1762 struct imsm_map *map = get_imsm_map(dev, 1);
1763 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
1764 __u32 strip = block / chunk;
1765
1766 switch (get_imsm_raid_level(map)) {
1767 case 1:
1768 case 10: {
1769 __u32 vol_strip = (strip * map->num_domains) + 1;
1770 __u32 vol_stripe = vol_strip / map->num_members;
1771
1772 return vol_stripe * chunk + block % chunk;
1773 } case 5: {
1774 __u32 stripe = strip / (map->num_members - 1);
1775
1776 return stripe * chunk + block % chunk;
1777 }
1778 default:
1779 return 0;
1780 }
1781 }
1782
1783 static __u64 blocks_per_migr_unit(struct intel_super *super,
1784 struct imsm_dev *dev)
1785 {
1786 /* calculate the conversion factor between per member 'blocks'
1787 * (md/{resync,rebuild}_start) and imsm migration units, return
1788 * 0 for the 'not migrating' and 'unsupported migration' cases
1789 */
1790 if (!dev->vol.migr_state)
1791 return 0;
1792
1793 switch (migr_type(dev)) {
1794 case MIGR_GEN_MIGR: {
1795 struct migr_record *migr_rec = super->migr_rec;
1796 return __le32_to_cpu(migr_rec->blocks_per_unit);
1797 }
1798 case MIGR_VERIFY:
1799 case MIGR_REPAIR:
1800 case MIGR_INIT: {
1801 struct imsm_map *map = get_imsm_map(dev, 0);
1802 __u32 stripes_per_unit;
1803 __u32 blocks_per_unit;
1804 __u32 parity_depth;
1805 __u32 migr_chunk;
1806 __u32 block_map;
1807 __u32 block_rel;
1808 __u32 segment;
1809 __u32 stripe;
1810 __u8 disks;
1811
1812 /* yes, this is really the translation of migr_units to
1813 * per-member blocks in the 'resync' case
1814 */
1815 stripes_per_unit = num_stripes_per_unit_resync(dev);
1816 migr_chunk = migr_strip_blocks_resync(dev);
1817 disks = imsm_num_data_members(dev, 0);
1818 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
1819 stripe = __le32_to_cpu(map->blocks_per_strip) * disks;
1820 segment = blocks_per_unit / stripe;
1821 block_rel = blocks_per_unit - segment * stripe;
1822 parity_depth = parity_segment_depth(dev);
1823 block_map = map_migr_block(dev, block_rel);
1824 return block_map + parity_depth * segment;
1825 }
1826 case MIGR_REBUILD: {
1827 __u32 stripes_per_unit;
1828 __u32 migr_chunk;
1829
1830 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
1831 migr_chunk = migr_strip_blocks_rebuild(dev);
1832 return migr_chunk * stripes_per_unit;
1833 }
1834 case MIGR_STATE_CHANGE:
1835 default:
1836 return 0;
1837 }
1838 }
1839
1840 static int imsm_level_to_layout(int level)
1841 {
1842 switch (level) {
1843 case 0:
1844 case 1:
1845 return 0;
1846 case 5:
1847 case 6:
1848 return ALGORITHM_LEFT_ASYMMETRIC;
1849 case 10:
1850 return 0x102;
1851 }
1852 return UnSet;
1853 }
1854
1855 /*******************************************************************************
1856 * Function: read_imsm_migr_rec
1857 * Description: Function reads imsm migration record from last sector of disk
1858 * Parameters:
1859 * fd : disk descriptor
1860 * super : metadata info
1861 * Returns:
1862 * 0 : success,
1863 * -1 : fail
1864 ******************************************************************************/
1865 static int read_imsm_migr_rec(int fd, struct intel_super *super)
1866 {
1867 int ret_val = -1;
1868 unsigned long long dsize;
1869
1870 get_dev_size(fd, NULL, &dsize);
1871 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1872 fprintf(stderr,
1873 Name ": Cannot seek to anchor block: %s\n",
1874 strerror(errno));
1875 goto out;
1876 }
1877 if (read(fd, super->migr_rec_buf, 512) != 512) {
1878 fprintf(stderr,
1879 Name ": Cannot read migr record block: %s\n",
1880 strerror(errno));
1881 goto out;
1882 }
1883 ret_val = 0;
1884
1885 out:
1886 return ret_val;
1887 }
1888
1889 /*******************************************************************************
1890 * Function: load_imsm_migr_rec
1891 * Description: Function reads imsm migration record (it is stored at the last
1892 * sector of disk)
1893 * Parameters:
1894 * super : imsm internal array info
1895 * info : general array info
1896 * Returns:
1897 * 0 : success
1898 * -1 : fail
1899 ******************************************************************************/
1900 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
1901 {
1902 struct mdinfo *sd;
1903 struct dl *dl = NULL;
1904 char nm[30];
1905 int retval = -1;
1906 int fd = -1;
1907
1908 if (info) {
1909 for (sd = info->devs ; sd ; sd = sd->next) {
1910 /* read only from one of the first two slots */
1911 if ((sd->disk.raid_disk > 1) ||
1912 (sd->disk.raid_disk < 0))
1913 continue;
1914 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
1915 fd = dev_open(nm, O_RDONLY);
1916 if (fd >= 0)
1917 break;
1918 }
1919 }
1920 if (fd < 0) {
1921 for (dl = super->disks; dl; dl = dl->next) {
1922 /* read only from one of the first two slots */
1923 if (dl->index > 1)
1924 continue;
1925 sprintf(nm, "%d:%d", dl->major, dl->minor);
1926 fd = dev_open(nm, O_RDONLY);
1927 if (fd >= 0)
1928 break;
1929 }
1930 }
1931 if (fd < 0)
1932 goto out;
1933 retval = read_imsm_migr_rec(fd, super);
1934
1935 out:
1936 if (fd >= 0)
1937 close(fd);
1938 return retval;
1939 }
1940
1941 /*******************************************************************************
1942 * Function: write_imsm_migr_rec
1943 * Description: Function writes imsm migration record
1944 * (at the last sector of disk)
1945 * Parameters:
1946 * super : imsm internal array info
1947 * Returns:
1948 * 0 : success
1949 * -1 : if fail
1950 ******************************************************************************/
1951 static int write_imsm_migr_rec(struct supertype *st)
1952 {
1953 struct intel_super *super = st->sb;
1954 unsigned long long dsize;
1955 char nm[30];
1956 int fd = -1;
1957 int retval = -1;
1958 struct dl *sd;
1959
1960 for (sd = super->disks ; sd ; sd = sd->next) {
1961 /* write to 2 first slots only */
1962 if ((sd->index < 0) || (sd->index > 1))
1963 continue;
1964 sprintf(nm, "%d:%d", sd->major, sd->minor);
1965 fd = dev_open(nm, O_RDWR);
1966 if (fd < 0)
1967 continue;
1968 get_dev_size(fd, NULL, &dsize);
1969 if (lseek64(fd, dsize - 512, SEEK_SET) < 0) {
1970 fprintf(stderr,
1971 Name ": Cannot seek to anchor block: %s\n",
1972 strerror(errno));
1973 goto out;
1974 }
1975 if (write(fd, super->migr_rec_buf, 512) != 512) {
1976 fprintf(stderr,
1977 Name ": Cannot write migr record block: %s\n",
1978 strerror(errno));
1979 goto out;
1980 }
1981 close(fd);
1982 fd = -1;
1983 }
1984
1985 retval = 0;
1986 out:
1987 if (fd >= 0)
1988 close(fd);
1989 return retval;
1990 }
1991
1992 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
1993 {
1994 struct intel_super *super = st->sb;
1995 struct migr_record *migr_rec = super->migr_rec;
1996 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
1997 struct imsm_map *map = get_imsm_map(dev, 0);
1998 struct imsm_map *prev_map = get_imsm_map(dev, 1);
1999 struct imsm_map *map_to_analyse = map;
2000 struct dl *dl;
2001 char *devname;
2002 unsigned int component_size_alligment;
2003 int map_disks = info->array.raid_disks;
2004
2005 memset(info, 0, sizeof(*info));
2006 if (prev_map)
2007 map_to_analyse = prev_map;
2008
2009 for (dl = super->disks; dl; dl = dl->next)
2010 if (dl->raiddisk == info->disk.raid_disk)
2011 break;
2012 info->container_member = super->current_vol;
2013 info->array.raid_disks = map->num_members;
2014 info->array.level = get_imsm_raid_level(map_to_analyse);
2015 info->array.layout = imsm_level_to_layout(info->array.level);
2016 info->array.md_minor = -1;
2017 info->array.ctime = 0;
2018 info->array.utime = 0;
2019 info->array.chunk_size =
2020 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
2021 info->array.state = !dev->vol.dirty;
2022 info->custom_array_size = __le32_to_cpu(dev->size_high);
2023 info->custom_array_size <<= 32;
2024 info->custom_array_size |= __le32_to_cpu(dev->size_low);
2025 if (prev_map && map->map_state == prev_map->map_state) {
2026 info->reshape_active = 1;
2027 info->new_level = get_imsm_raid_level(map);
2028 info->new_layout = imsm_level_to_layout(info->new_level);
2029 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
2030 info->delta_disks = map->num_members - prev_map->num_members;
2031 if (info->delta_disks) {
2032 /* this needs to be applied to every array
2033 * in the container.
2034 */
2035 info->reshape_active = 2;
2036 }
2037 /* We shape information that we give to md might have to be
2038 * modify to cope with md's requirement for reshaping arrays.
2039 * For example, when reshaping a RAID0, md requires it to be
2040 * presented as a degraded RAID4.
2041 * Also if a RAID0 is migrating to a RAID5 we need to specify
2042 * the array as already being RAID5, but the 'before' layout
2043 * is a RAID4-like layout.
2044 */
2045 switch (info->array.level) {
2046 case 0:
2047 switch(info->new_level) {
2048 case 0:
2049 /* conversion is happening as RAID4 */
2050 info->array.level = 4;
2051 info->array.raid_disks += 1;
2052 break;
2053 case 5:
2054 /* conversion is happening as RAID5 */
2055 info->array.level = 5;
2056 info->array.layout = ALGORITHM_PARITY_N;
2057 info->array.raid_disks += 1;
2058 info->delta_disks -= 1;
2059 break;
2060 default:
2061 /* FIXME error message */
2062 info->array.level = UnSet;
2063 break;
2064 }
2065 break;
2066 }
2067 } else {
2068 info->new_level = UnSet;
2069 info->new_layout = UnSet;
2070 info->new_chunk = info->array.chunk_size;
2071 info->delta_disks = 0;
2072 }
2073 info->disk.major = 0;
2074 info->disk.minor = 0;
2075 if (dl) {
2076 info->disk.major = dl->major;
2077 info->disk.minor = dl->minor;
2078 }
2079
2080 info->data_offset = __le32_to_cpu(map_to_analyse->pba_of_lba0);
2081 info->component_size =
2082 __le32_to_cpu(map_to_analyse->blocks_per_member);
2083
2084 /* check component size aligment
2085 */
2086 component_size_alligment =
2087 info->component_size % (info->array.chunk_size/512);
2088
2089 if (component_size_alligment &&
2090 (info->array.level != 1) && (info->array.level != UnSet)) {
2091 dprintf("imsm: reported component size alligned from %llu ",
2092 info->component_size);
2093 info->component_size -= component_size_alligment;
2094 dprintf("to %llu (%i).\n",
2095 info->component_size, component_size_alligment);
2096 }
2097
2098 memset(info->uuid, 0, sizeof(info->uuid));
2099 info->recovery_start = MaxSector;
2100
2101 info->reshape_progress = 0;
2102 info->resync_start = MaxSector;
2103 if (map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
2104 dev->vol.dirty) {
2105 info->resync_start = 0;
2106 }
2107 if (dev->vol.migr_state) {
2108 switch (migr_type(dev)) {
2109 case MIGR_REPAIR:
2110 case MIGR_INIT: {
2111 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2112 dev);
2113 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
2114
2115 info->resync_start = blocks_per_unit * units;
2116 break;
2117 }
2118 case MIGR_GEN_MIGR: {
2119 __u64 blocks_per_unit = blocks_per_migr_unit(super,
2120 dev);
2121 __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
2122 unsigned long long array_blocks;
2123 int used_disks;
2124
2125 info->reshape_progress = blocks_per_unit * units;
2126
2127 dprintf("IMSM: General Migration checkpoint : %llu "
2128 "(%llu) -> read reshape progress : %llu\n",
2129 units, blocks_per_unit, info->reshape_progress);
2130
2131 used_disks = imsm_num_data_members(dev, 1);
2132 if (used_disks > 0) {
2133 array_blocks = map->blocks_per_member *
2134 used_disks;
2135 /* round array size down to closest MB
2136 */
2137 info->custom_array_size = (array_blocks
2138 >> SECT_PER_MB_SHIFT)
2139 << SECT_PER_MB_SHIFT;
2140 }
2141 }
2142 case MIGR_VERIFY:
2143 /* we could emulate the checkpointing of
2144 * 'sync_action=check' migrations, but for now
2145 * we just immediately complete them
2146 */
2147 case MIGR_REBUILD:
2148 /* this is handled by container_content_imsm() */
2149 case MIGR_STATE_CHANGE:
2150 /* FIXME handle other migrations */
2151 default:
2152 /* we are not dirty, so... */
2153 info->resync_start = MaxSector;
2154 }
2155 }
2156
2157 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
2158 info->name[MAX_RAID_SERIAL_LEN] = 0;
2159
2160 info->array.major_version = -1;
2161 info->array.minor_version = -2;
2162 devname = devnum2devname(st->container_dev);
2163 *info->text_version = '\0';
2164 if (devname)
2165 sprintf(info->text_version, "/%s/%d", devname, info->container_member);
2166 free(devname);
2167 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
2168 uuid_from_super_imsm(st, info->uuid);
2169
2170 if (dmap) {
2171 int i, j;
2172 for (i=0; i<map_disks; i++) {
2173 dmap[i] = 0;
2174 if (i < info->array.raid_disks) {
2175 struct imsm_disk *dsk;
2176 j = get_imsm_disk_idx(dev, i, -1);
2177 dsk = get_imsm_disk(super, j);
2178 if (dsk && (dsk->status & CONFIGURED_DISK))
2179 dmap[i] = 1;
2180 }
2181 }
2182 }
2183 }
2184
2185 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed);
2186 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev);
2187
2188 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
2189 {
2190 struct dl *d;
2191
2192 for (d = super->missing; d; d = d->next)
2193 if (d->index == index)
2194 return &d->disk;
2195 return NULL;
2196 }
2197
2198 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
2199 {
2200 struct intel_super *super = st->sb;
2201 struct imsm_disk *disk;
2202 int map_disks = info->array.raid_disks;
2203 int max_enough = -1;
2204 int i;
2205 struct imsm_super *mpb;
2206
2207 if (super->current_vol >= 0) {
2208 getinfo_super_imsm_volume(st, info, map);
2209 return;
2210 }
2211 memset(info, 0, sizeof(*info));
2212
2213 /* Set raid_disks to zero so that Assemble will always pull in valid
2214 * spares
2215 */
2216 info->array.raid_disks = 0;
2217 info->array.level = LEVEL_CONTAINER;
2218 info->array.layout = 0;
2219 info->array.md_minor = -1;
2220 info->array.ctime = 0; /* N/A for imsm */
2221 info->array.utime = 0;
2222 info->array.chunk_size = 0;
2223
2224 info->disk.major = 0;
2225 info->disk.minor = 0;
2226 info->disk.raid_disk = -1;
2227 info->reshape_active = 0;
2228 info->array.major_version = -1;
2229 info->array.minor_version = -2;
2230 strcpy(info->text_version, "imsm");
2231 info->safe_mode_delay = 0;
2232 info->disk.number = -1;
2233 info->disk.state = 0;
2234 info->name[0] = 0;
2235 info->recovery_start = MaxSector;
2236
2237 /* do we have the all the insync disks that we expect? */
2238 mpb = super->anchor;
2239
2240 for (i = 0; i < mpb->num_raid_devs; i++) {
2241 struct imsm_dev *dev = get_imsm_dev(super, i);
2242 int failed, enough, j, missing = 0;
2243 struct imsm_map *map;
2244 __u8 state;
2245
2246 failed = imsm_count_failed(super, dev);
2247 state = imsm_check_degraded(super, dev, failed);
2248 map = get_imsm_map(dev, dev->vol.migr_state);
2249
2250 /* any newly missing disks?
2251 * (catches single-degraded vs double-degraded)
2252 */
2253 for (j = 0; j < map->num_members; j++) {
2254 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
2255 __u32 idx = ord_to_idx(ord);
2256
2257 if (!(ord & IMSM_ORD_REBUILD) &&
2258 get_imsm_missing(super, idx)) {
2259 missing = 1;
2260 break;
2261 }
2262 }
2263
2264 if (state == IMSM_T_STATE_FAILED)
2265 enough = -1;
2266 else if (state == IMSM_T_STATE_DEGRADED &&
2267 (state != map->map_state || missing))
2268 enough = 0;
2269 else /* we're normal, or already degraded */
2270 enough = 1;
2271
2272 /* in the missing/failed disk case check to see
2273 * if at least one array is runnable
2274 */
2275 max_enough = max(max_enough, enough);
2276 }
2277 dprintf("%s: enough: %d\n", __func__, max_enough);
2278 info->container_enough = max_enough;
2279
2280 if (super->disks) {
2281 __u32 reserved = imsm_reserved_sectors(super, super->disks);
2282
2283 disk = &super->disks->disk;
2284 info->data_offset = __le32_to_cpu(disk->total_blocks) - reserved;
2285 info->component_size = reserved;
2286 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
2287 /* we don't change info->disk.raid_disk here because
2288 * this state will be finalized in mdmon after we have
2289 * found the 'most fresh' version of the metadata
2290 */
2291 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2292 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2293 }
2294
2295 /* only call uuid_from_super_imsm when this disk is part of a populated container,
2296 * ->compare_super may have updated the 'num_raid_devs' field for spares
2297 */
2298 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
2299 uuid_from_super_imsm(st, info->uuid);
2300 else
2301 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
2302
2303 /* I don't know how to compute 'map' on imsm, so use safe default */
2304 if (map) {
2305 int i;
2306 for (i = 0; i < map_disks; i++)
2307 map[i] = 1;
2308 }
2309
2310 }
2311
2312 /* allocates memory and fills disk in mdinfo structure
2313 * for each disk in array */
2314 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
2315 {
2316 struct mdinfo *mddev = NULL;
2317 struct intel_super *super = st->sb;
2318 struct imsm_disk *disk;
2319 int count = 0;
2320 struct dl *dl;
2321 if (!super || !super->disks)
2322 return NULL;
2323 dl = super->disks;
2324 mddev = malloc(sizeof(*mddev));
2325 if (!mddev) {
2326 fprintf(stderr, Name ": Failed to allocate memory.\n");
2327 return NULL;
2328 }
2329 memset(mddev, 0, sizeof(*mddev));
2330 while (dl) {
2331 struct mdinfo *tmp;
2332 disk = &dl->disk;
2333 tmp = malloc(sizeof(*tmp));
2334 if (!tmp) {
2335 fprintf(stderr, Name ": Failed to allocate memory.\n");
2336 if (mddev)
2337 sysfs_free(mddev);
2338 return NULL;
2339 }
2340 memset(tmp, 0, sizeof(*tmp));
2341 if (mddev->devs)
2342 tmp->next = mddev->devs;
2343 mddev->devs = tmp;
2344 tmp->disk.number = count++;
2345 tmp->disk.major = dl->major;
2346 tmp->disk.minor = dl->minor;
2347 tmp->disk.state = is_configured(disk) ?
2348 (1 << MD_DISK_ACTIVE) : 0;
2349 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
2350 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
2351 tmp->disk.raid_disk = -1;
2352 dl = dl->next;
2353 }
2354 return mddev;
2355 }
2356
2357 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
2358 char *update, char *devname, int verbose,
2359 int uuid_set, char *homehost)
2360 {
2361 /* For 'assemble' and 'force' we need to return non-zero if any
2362 * change was made. For others, the return value is ignored.
2363 * Update options are:
2364 * force-one : This device looks a bit old but needs to be included,
2365 * update age info appropriately.
2366 * assemble: clear any 'faulty' flag to allow this device to
2367 * be assembled.
2368 * force-array: Array is degraded but being forced, mark it clean
2369 * if that will be needed to assemble it.
2370 *
2371 * newdev: not used ????
2372 * grow: Array has gained a new device - this is currently for
2373 * linear only
2374 * resync: mark as dirty so a resync will happen.
2375 * name: update the name - preserving the homehost
2376 * uuid: Change the uuid of the array to match watch is given
2377 *
2378 * Following are not relevant for this imsm:
2379 * sparc2.2 : update from old dodgey metadata
2380 * super-minor: change the preferred_minor number
2381 * summaries: update redundant counters.
2382 * homehost: update the recorded homehost
2383 * _reshape_progress: record new reshape_progress position.
2384 */
2385 int rv = 1;
2386 struct intel_super *super = st->sb;
2387 struct imsm_super *mpb;
2388
2389 /* we can only update container info */
2390 if (!super || super->current_vol >= 0 || !super->anchor)
2391 return 1;
2392
2393 mpb = super->anchor;
2394
2395 if (strcmp(update, "uuid") == 0 && uuid_set && !info->update_private)
2396 rv = -1;
2397 else if (strcmp(update, "uuid") == 0 && uuid_set && info->update_private) {
2398 mpb->orig_family_num = *((__u32 *) info->update_private);
2399 rv = 0;
2400 } else if (strcmp(update, "uuid") == 0) {
2401 __u32 *new_family = malloc(sizeof(*new_family));
2402
2403 /* update orig_family_number with the incoming random
2404 * data, report the new effective uuid, and store the
2405 * new orig_family_num for future updates.
2406 */
2407 if (new_family) {
2408 memcpy(&mpb->orig_family_num, info->uuid, sizeof(__u32));
2409 uuid_from_super_imsm(st, info->uuid);
2410 *new_family = mpb->orig_family_num;
2411 info->update_private = new_family;
2412 rv = 0;
2413 }
2414 } else if (strcmp(update, "assemble") == 0)
2415 rv = 0;
2416 else
2417 rv = -1;
2418
2419 /* successful update? recompute checksum */
2420 if (rv == 0)
2421 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
2422
2423 return rv;
2424 }
2425
2426 static size_t disks_to_mpb_size(int disks)
2427 {
2428 size_t size;
2429
2430 size = sizeof(struct imsm_super);
2431 size += (disks - 1) * sizeof(struct imsm_disk);
2432 size += 2 * sizeof(struct imsm_dev);
2433 /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
2434 size += (4 - 2) * sizeof(struct imsm_map);
2435 /* 4 possible disk_ord_tbl's */
2436 size += 4 * (disks - 1) * sizeof(__u32);
2437
2438 return size;
2439 }
2440
2441 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize)
2442 {
2443 if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
2444 return 0;
2445
2446 return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
2447 }
2448
2449 static void free_devlist(struct intel_super *super)
2450 {
2451 struct intel_dev *dv;
2452
2453 while (super->devlist) {
2454 dv = super->devlist->next;
2455 free(super->devlist->dev);
2456 free(super->devlist);
2457 super->devlist = dv;
2458 }
2459 }
2460
2461 static void imsm_copy_dev(struct imsm_dev *dest, struct imsm_dev *src)
2462 {
2463 memcpy(dest, src, sizeof_imsm_dev(src, 0));
2464 }
2465
2466 static int compare_super_imsm(struct supertype *st, struct supertype *tst)
2467 {
2468 /*
2469 * return:
2470 * 0 same, or first was empty, and second was copied
2471 * 1 second had wrong number
2472 * 2 wrong uuid
2473 * 3 wrong other info
2474 */
2475 struct intel_super *first = st->sb;
2476 struct intel_super *sec = tst->sb;
2477
2478 if (!first) {
2479 st->sb = tst->sb;
2480 tst->sb = NULL;
2481 return 0;
2482 }
2483 /* in platform dependent environment test if the disks
2484 * use the same Intel hba
2485 */
2486 if (!check_env("IMSM_NO_PLATFORM")) {
2487 if (!first->hba || !sec->hba ||
2488 (first->hba->type != sec->hba->type)) {
2489 fprintf(stderr,
2490 "HBAs of devices does not match %s != %s\n",
2491 first->hba ? get_sys_dev_type(first->hba->type) : NULL,
2492 sec->hba ? get_sys_dev_type(sec->hba->type) : NULL);
2493 return 3;
2494 }
2495 }
2496
2497 /* if an anchor does not have num_raid_devs set then it is a free
2498 * floating spare
2499 */
2500 if (first->anchor->num_raid_devs > 0 &&
2501 sec->anchor->num_raid_devs > 0) {
2502 /* Determine if these disks might ever have been
2503 * related. Further disambiguation can only take place
2504 * in load_super_imsm_all
2505 */
2506 __u32 first_family = first->anchor->orig_family_num;
2507 __u32 sec_family = sec->anchor->orig_family_num;
2508
2509 if (memcmp(first->anchor->sig, sec->anchor->sig,
2510 MAX_SIGNATURE_LENGTH) != 0)
2511 return 3;
2512
2513 if (first_family == 0)
2514 first_family = first->anchor->family_num;
2515 if (sec_family == 0)
2516 sec_family = sec->anchor->family_num;
2517
2518 if (first_family != sec_family)
2519 return 3;
2520
2521 }
2522
2523
2524 /* if 'first' is a spare promote it to a populated mpb with sec's
2525 * family number
2526 */
2527 if (first->anchor->num_raid_devs == 0 &&
2528 sec->anchor->num_raid_devs > 0) {
2529 int i;
2530 struct intel_dev *dv;
2531 struct imsm_dev *dev;
2532
2533 /* we need to copy raid device info from sec if an allocation
2534 * fails here we don't associate the spare
2535 */
2536 for (i = 0; i < sec->anchor->num_raid_devs; i++) {
2537 dv = malloc(sizeof(*dv));
2538 if (!dv)
2539 break;
2540 dev = malloc(sizeof_imsm_dev(get_imsm_dev(sec, i), 1));
2541 if (!dev) {
2542 free(dv);
2543 break;
2544 }
2545 dv->dev = dev;
2546 dv->index = i;
2547 dv->next = first->devlist;
2548 first->devlist = dv;
2549 }
2550 if (i < sec->anchor->num_raid_devs) {
2551 /* allocation failure */
2552 free_devlist(first);
2553 fprintf(stderr, "imsm: failed to associate spare\n");
2554 return 3;
2555 }
2556 first->anchor->num_raid_devs = sec->anchor->num_raid_devs;
2557 first->anchor->orig_family_num = sec->anchor->orig_family_num;
2558 first->anchor->family_num = sec->anchor->family_num;
2559 memcpy(first->anchor->sig, sec->anchor->sig, MAX_SIGNATURE_LENGTH);
2560 for (i = 0; i < sec->anchor->num_raid_devs; i++)
2561 imsm_copy_dev(get_imsm_dev(first, i), get_imsm_dev(sec, i));
2562 }
2563
2564 return 0;
2565 }
2566
2567 static void fd2devname(int fd, char *name)
2568 {
2569 struct stat st;
2570 char path[256];
2571 char dname[PATH_MAX];
2572 char *nm;
2573 int rv;
2574
2575 name[0] = '\0';
2576 if (fstat(fd, &st) != 0)
2577 return;
2578 sprintf(path, "/sys/dev/block/%d:%d",
2579 major(st.st_rdev), minor(st.st_rdev));
2580
2581 rv = readlink(path, dname, sizeof(dname));
2582 if (rv <= 0)
2583 return;
2584
2585 dname[rv] = '\0';
2586 nm = strrchr(dname, '/');
2587 nm++;
2588 snprintf(name, MAX_RAID_SERIAL_LEN, "/dev/%s", nm);
2589 }
2590
2591 extern int scsi_get_serial(int fd, void *buf, size_t buf_len);
2592
2593 static int imsm_read_serial(int fd, char *devname,
2594 __u8 serial[MAX_RAID_SERIAL_LEN])
2595 {
2596 unsigned char scsi_serial[255];
2597 int rv;
2598 int rsp_len;
2599 int len;
2600 char *dest;
2601 char *src;
2602 char *rsp_buf;
2603 int i;
2604
2605 memset(scsi_serial, 0, sizeof(scsi_serial));
2606
2607 rv = scsi_get_serial(fd, scsi_serial, sizeof(scsi_serial));
2608
2609 if (rv && check_env("IMSM_DEVNAME_AS_SERIAL")) {
2610 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2611 fd2devname(fd, (char *) serial);
2612 return 0;
2613 }
2614
2615 if (rv != 0) {
2616 if (devname)
2617 fprintf(stderr,
2618 Name ": Failed to retrieve serial for %s\n",
2619 devname);
2620 return rv;
2621 }
2622
2623 rsp_len = scsi_serial[3];
2624 if (!rsp_len) {
2625 if (devname)
2626 fprintf(stderr,
2627 Name ": Failed to retrieve serial for %s\n",
2628 devname);
2629 return 2;
2630 }
2631 rsp_buf = (char *) &scsi_serial[4];
2632
2633 /* trim all whitespace and non-printable characters and convert
2634 * ':' to ';'
2635 */
2636 for (i = 0, dest = rsp_buf; i < rsp_len; i++) {
2637 src = &rsp_buf[i];
2638 if (*src > 0x20) {
2639 /* ':' is reserved for use in placeholder serial
2640 * numbers for missing disks
2641 */
2642 if (*src == ':')
2643 *dest++ = ';';
2644 else
2645 *dest++ = *src;
2646 }
2647 }
2648 len = dest - rsp_buf;
2649 dest = rsp_buf;
2650
2651 /* truncate leading characters */
2652 if (len > MAX_RAID_SERIAL_LEN) {
2653 dest += len - MAX_RAID_SERIAL_LEN;
2654 len = MAX_RAID_SERIAL_LEN;
2655 }
2656
2657 memset(serial, 0, MAX_RAID_SERIAL_LEN);
2658 memcpy(serial, dest, len);
2659
2660 return 0;
2661 }
2662
2663 static int serialcmp(__u8 *s1, __u8 *s2)
2664 {
2665 return strncmp((char *) s1, (char *) s2, MAX_RAID_SERIAL_LEN);
2666 }
2667
2668 static void serialcpy(__u8 *dest, __u8 *src)
2669 {
2670 strncpy((char *) dest, (char *) src, MAX_RAID_SERIAL_LEN);
2671 }
2672
2673 #ifndef MDASSEMBLE
2674 static struct dl *serial_to_dl(__u8 *serial, struct intel_super *super)
2675 {
2676 struct dl *dl;
2677
2678 for (dl = super->disks; dl; dl = dl->next)
2679 if (serialcmp(dl->serial, serial) == 0)
2680 break;
2681
2682 return dl;
2683 }
2684 #endif
2685
2686 static struct imsm_disk *
2687 __serial_to_disk(__u8 *serial, struct imsm_super *mpb, int *idx)
2688 {
2689 int i;
2690
2691 for (i = 0; i < mpb->num_disks; i++) {
2692 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
2693
2694 if (serialcmp(disk->serial, serial) == 0) {
2695 if (idx)
2696 *idx = i;
2697 return disk;
2698 }
2699 }
2700
2701 return NULL;
2702 }
2703
2704 static int
2705 load_imsm_disk(int fd, struct intel_super *super, char *devname, int keep_fd)
2706 {
2707 struct imsm_disk *disk;
2708 struct dl *dl;
2709 struct stat stb;
2710 int rv;
2711 char name[40];
2712 __u8 serial[MAX_RAID_SERIAL_LEN];
2713
2714 rv = imsm_read_serial(fd, devname, serial);
2715
2716 if (rv != 0)
2717 return 2;
2718
2719 dl = calloc(1, sizeof(*dl));
2720 if (!dl) {
2721 if (devname)
2722 fprintf(stderr,
2723 Name ": failed to allocate disk buffer for %s\n",
2724 devname);
2725 return 2;
2726 }
2727
2728 fstat(fd, &stb);
2729 dl->major = major(stb.st_rdev);
2730 dl->minor = minor(stb.st_rdev);
2731 dl->next = super->disks;
2732 dl->fd = keep_fd ? fd : -1;
2733 assert(super->disks == NULL);
2734 super->disks = dl;
2735 serialcpy(dl->serial, serial);
2736 dl->index = -2;
2737 dl->e = NULL;
2738 fd2devname(fd, name);
2739 if (devname)
2740 dl->devname = strdup(devname);
2741 else
2742 dl->devname = strdup(name);
2743
2744 /* look up this disk's index in the current anchor */
2745 disk = __serial_to_disk(dl->serial, super->anchor, &dl->index);
2746 if (disk) {
2747 dl->disk = *disk;
2748 /* only set index on disks that are a member of a
2749 * populated contianer, i.e. one with raid_devs
2750 */
2751 if (is_failed(&dl->disk))
2752 dl->index = -2;
2753 else if (is_spare(&dl->disk))
2754 dl->index = -1;
2755 }
2756
2757 return 0;
2758 }
2759
2760 #ifndef MDASSEMBLE
2761 /* When migrating map0 contains the 'destination' state while map1
2762 * contains the current state. When not migrating map0 contains the
2763 * current state. This routine assumes that map[0].map_state is set to
2764 * the current array state before being called.
2765 *
2766 * Migration is indicated by one of the following states
2767 * 1/ Idle (migr_state=0 map0state=normal||unitialized||degraded||failed)
2768 * 2/ Initialize (migr_state=1 migr_type=MIGR_INIT map0state=normal
2769 * map1state=unitialized)
2770 * 3/ Repair (Resync) (migr_state=1 migr_type=MIGR_REPAIR map0state=normal
2771 * map1state=normal)
2772 * 4/ Rebuild (migr_state=1 migr_type=MIGR_REBUILD map0state=normal
2773 * map1state=degraded)
2774 * 5/ Migration (mig_state=1 migr_type=MIGR_GEN_MIGR map0state=normal
2775 * map1state=normal)
2776 */
2777 static void migrate(struct imsm_dev *dev, struct intel_super *super,
2778 __u8 to_state, int migr_type)
2779 {
2780 struct imsm_map *dest;
2781 struct imsm_map *src = get_imsm_map(dev, 0);
2782
2783 dev->vol.migr_state = 1;
2784 set_migr_type(dev, migr_type);
2785 dev->vol.curr_migr_unit = 0;
2786 dest = get_imsm_map(dev, 1);
2787
2788 /* duplicate and then set the target end state in map[0] */
2789 memcpy(dest, src, sizeof_imsm_map(src));
2790 if ((migr_type == MIGR_REBUILD) ||
2791 (migr_type == MIGR_GEN_MIGR)) {
2792 __u32 ord;
2793 int i;
2794
2795 for (i = 0; i < src->num_members; i++) {
2796 ord = __le32_to_cpu(src->disk_ord_tbl[i]);
2797 set_imsm_ord_tbl_ent(src, i, ord_to_idx(ord));
2798 }
2799 }
2800
2801 if (migr_type == MIGR_GEN_MIGR)
2802 /* Clear migration record */
2803 memset(super->migr_rec, 0, sizeof(struct migr_record));
2804
2805 src->map_state = to_state;
2806 }
2807
2808 static void end_migration(struct imsm_dev *dev, __u8 map_state)
2809 {
2810 struct imsm_map *map = get_imsm_map(dev, 0);
2811 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
2812 int i, j;
2813
2814 /* merge any IMSM_ORD_REBUILD bits that were not successfully
2815 * completed in the last migration.
2816 *
2817 * FIXME add support for raid-level-migration
2818 */
2819 for (i = 0; i < prev->num_members; i++)
2820 for (j = 0; j < map->num_members; j++)
2821 /* during online capacity expansion
2822 * disks position can be changed if takeover is used
2823 */
2824 if (ord_to_idx(map->disk_ord_tbl[j]) ==
2825 ord_to_idx(prev->disk_ord_tbl[i])) {
2826 map->disk_ord_tbl[j] |= prev->disk_ord_tbl[i];
2827 break;
2828 }
2829
2830 dev->vol.migr_state = 0;
2831 dev->vol.migr_type = 0;
2832 dev->vol.curr_migr_unit = 0;
2833 map->map_state = map_state;
2834 }
2835 #endif
2836
2837 static int parse_raid_devices(struct intel_super *super)
2838 {
2839 int i;
2840 struct imsm_dev *dev_new;
2841 size_t len, len_migr;
2842 size_t max_len = 0;
2843 size_t space_needed = 0;
2844 struct imsm_super *mpb = super->anchor;
2845
2846 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2847 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2848 struct intel_dev *dv;
2849
2850 len = sizeof_imsm_dev(dev_iter, 0);
2851 len_migr = sizeof_imsm_dev(dev_iter, 1);
2852 if (len_migr > len)
2853 space_needed += len_migr - len;
2854
2855 dv = malloc(sizeof(*dv));
2856 if (!dv)
2857 return 1;
2858 if (max_len < len_migr)
2859 max_len = len_migr;
2860 if (max_len > len_migr)
2861 space_needed += max_len - len_migr;
2862 dev_new = malloc(max_len);
2863 if (!dev_new) {
2864 free(dv);
2865 return 1;
2866 }
2867 imsm_copy_dev(dev_new, dev_iter);
2868 dv->dev = dev_new;
2869 dv->index = i;
2870 dv->next = super->devlist;
2871 super->devlist = dv;
2872 }
2873
2874 /* ensure that super->buf is large enough when all raid devices
2875 * are migrating
2876 */
2877 if (__le32_to_cpu(mpb->mpb_size) + space_needed > super->len) {
2878 void *buf;
2879
2880 len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + space_needed, 512);
2881 if (posix_memalign(&buf, 512, len) != 0)
2882 return 1;
2883
2884 memcpy(buf, super->buf, super->len);
2885 memset(buf + super->len, 0, len - super->len);
2886 free(super->buf);
2887 super->buf = buf;
2888 super->len = len;
2889 }
2890
2891 return 0;
2892 }
2893
2894 /* retrieve a pointer to the bbm log which starts after all raid devices */
2895 struct bbm_log *__get_imsm_bbm_log(struct imsm_super *mpb)
2896 {
2897 void *ptr = NULL;
2898
2899 if (__le32_to_cpu(mpb->bbm_log_size)) {
2900 ptr = mpb;
2901 ptr += mpb->mpb_size - __le32_to_cpu(mpb->bbm_log_size);
2902 }
2903
2904 return ptr;
2905 }
2906
2907 /*******************************************************************************
2908 * Function: check_mpb_migr_compatibility
2909 * Description: Function checks for unsupported migration features:
2910 * - migration optimization area (pba_of_lba0)
2911 * - descending reshape (ascending_migr)
2912 * Parameters:
2913 * super : imsm metadata information
2914 * Returns:
2915 * 0 : migration is compatible
2916 * -1 : migration is not compatible
2917 ******************************************************************************/
2918 int check_mpb_migr_compatibility(struct intel_super *super)
2919 {
2920 struct imsm_map *map0, *map1;
2921 struct migr_record *migr_rec = super->migr_rec;
2922 int i;
2923
2924 for (i = 0; i < super->anchor->num_raid_devs; i++) {
2925 struct imsm_dev *dev_iter = __get_imsm_dev(super->anchor, i);
2926
2927 if (dev_iter &&
2928 dev_iter->vol.migr_state == 1 &&
2929 dev_iter->vol.migr_type == MIGR_GEN_MIGR) {
2930 /* This device is migrating */
2931 map0 = get_imsm_map(dev_iter, 0);
2932 map1 = get_imsm_map(dev_iter, 1);
2933 if (map0->pba_of_lba0 != map1->pba_of_lba0)
2934 /* migration optimization area was used */
2935 return -1;
2936 if (migr_rec->ascending_migr == 0
2937 && migr_rec->dest_depth_per_unit > 0)
2938 /* descending reshape not supported yet */
2939 return -1;
2940 }
2941 }
2942 return 0;
2943 }
2944
2945 static void __free_imsm(struct intel_super *super, int free_disks);
2946
2947 /* load_imsm_mpb - read matrix metadata
2948 * allocates super->mpb to be freed by free_imsm
2949 */
2950 static int load_imsm_mpb(int fd, struct intel_super *super, char *devname)
2951 {
2952 unsigned long long dsize;
2953 unsigned long long sectors;
2954 struct stat;
2955 struct imsm_super *anchor;
2956 __u32 check_sum;
2957
2958 get_dev_size(fd, NULL, &dsize);
2959 if (dsize < 1024) {
2960 if (devname)
2961 fprintf(stderr,
2962 Name ": %s: device to small for imsm\n",
2963 devname);
2964 return 1;
2965 }
2966
2967 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0) {
2968 if (devname)
2969 fprintf(stderr,
2970 Name ": Cannot seek to anchor block on %s: %s\n",
2971 devname, strerror(errno));
2972 return 1;
2973 }
2974
2975 if (posix_memalign((void**)&anchor, 512, 512) != 0) {
2976 if (devname)
2977 fprintf(stderr,
2978 Name ": Failed to allocate imsm anchor buffer"
2979 " on %s\n", devname);
2980 return 1;
2981 }
2982 if (read(fd, anchor, 512) != 512) {
2983 if (devname)
2984 fprintf(stderr,
2985 Name ": Cannot read anchor block on %s: %s\n",
2986 devname, strerror(errno));
2987 free(anchor);
2988 return 1;
2989 }
2990
2991 if (strncmp((char *) anchor->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0) {
2992 if (devname)
2993 fprintf(stderr,
2994 Name ": no IMSM anchor on %s\n", devname);
2995 free(anchor);
2996 return 2;
2997 }
2998
2999 __free_imsm(super, 0);
3000 /* reload capability and hba */
3001
3002 /* capability and hba must be updated with new super allocation */
3003 find_intel_hba_capability(fd, super, devname);
3004 super->len = ROUND_UP(anchor->mpb_size, 512);
3005 if (posix_memalign(&super->buf, 512, super->len) != 0) {
3006 if (devname)
3007 fprintf(stderr,
3008 Name ": unable to allocate %zu byte mpb buffer\n",
3009 super->len);
3010 free(anchor);
3011 return 2;
3012 }
3013 memcpy(super->buf, anchor, 512);
3014
3015 sectors = mpb_sectors(anchor) - 1;
3016 free(anchor);
3017
3018 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3019 fprintf(stderr, Name
3020 ": %s could not allocate migr_rec buffer\n", __func__);
3021 free(super->buf);
3022 return 2;
3023 }
3024
3025 if (!sectors) {
3026 check_sum = __gen_imsm_checksum(super->anchor);
3027 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3028 if (devname)
3029 fprintf(stderr,
3030 Name ": IMSM checksum %x != %x on %s\n",
3031 check_sum,
3032 __le32_to_cpu(super->anchor->check_sum),
3033 devname);
3034 return 2;
3035 }
3036
3037 return 0;
3038 }
3039
3040 /* read the extended mpb */
3041 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0) {
3042 if (devname)
3043 fprintf(stderr,
3044 Name ": Cannot seek to extended mpb on %s: %s\n",
3045 devname, strerror(errno));
3046 return 1;
3047 }
3048
3049 if ((unsigned)read(fd, super->buf + 512, super->len - 512) != super->len - 512) {
3050 if (devname)
3051 fprintf(stderr,
3052 Name ": Cannot read extended mpb on %s: %s\n",
3053 devname, strerror(errno));
3054 return 2;
3055 }
3056
3057 check_sum = __gen_imsm_checksum(super->anchor);
3058 if (check_sum != __le32_to_cpu(super->anchor->check_sum)) {
3059 if (devname)
3060 fprintf(stderr,
3061 Name ": IMSM checksum %x != %x on %s\n",
3062 check_sum, __le32_to_cpu(super->anchor->check_sum),
3063 devname);
3064 return 3;
3065 }
3066
3067 /* FIXME the BBM log is disk specific so we cannot use this global
3068 * buffer for all disks. Ok for now since we only look at the global
3069 * bbm_log_size parameter to gate assembly
3070 */
3071 super->bbm_log = __get_imsm_bbm_log(super->anchor);
3072
3073 return 0;
3074 }
3075
3076 static int read_imsm_migr_rec(int fd, struct intel_super *super);
3077
3078 static int
3079 load_and_parse_mpb(int fd, struct intel_super *super, char *devname, int keep_fd)
3080 {
3081 int err;
3082
3083 err = load_imsm_mpb(fd, super, devname);
3084 if (err)
3085 return err;
3086 err = load_imsm_disk(fd, super, devname, keep_fd);
3087 if (err)
3088 return err;
3089 err = parse_raid_devices(super);
3090
3091 return err;
3092 }
3093
3094 static void __free_imsm_disk(struct dl *d)
3095 {
3096 if (d->fd >= 0)
3097 close(d->fd);
3098 if (d->devname)
3099 free(d->devname);
3100 if (d->e)
3101 free(d->e);
3102 free(d);
3103
3104 }
3105
3106 static void free_imsm_disks(struct intel_super *super)
3107 {
3108 struct dl *d;
3109
3110 while (super->disks) {
3111 d = super->disks;
3112 super->disks = d->next;
3113 __free_imsm_disk(d);
3114 }
3115 while (super->disk_mgmt_list) {
3116 d = super->disk_mgmt_list;
3117 super->disk_mgmt_list = d->next;
3118 __free_imsm_disk(d);
3119 }
3120 while (super->missing) {
3121 d = super->missing;
3122 super->missing = d->next;
3123 __free_imsm_disk(d);
3124 }
3125
3126 }
3127
3128 /* free all the pieces hanging off of a super pointer */
3129 static void __free_imsm(struct intel_super *super, int free_disks)
3130 {
3131 struct intel_hba *elem, *next;
3132
3133 if (super->buf) {
3134 free(super->buf);
3135 super->buf = NULL;
3136 }
3137 /* unlink capability description */
3138 super->orom = NULL;
3139 if (super->migr_rec_buf) {
3140 free(super->migr_rec_buf);
3141 super->migr_rec_buf = NULL;
3142 }
3143 if (free_disks)
3144 free_imsm_disks(super);
3145 free_devlist(super);
3146 elem = super->hba;
3147 while (elem) {
3148 if (elem->path)
3149 free((void *)elem->path);
3150 next = elem->next;
3151 free(elem);
3152 elem = next;
3153 }
3154 super->hba = NULL;
3155 }
3156
3157 static void free_imsm(struct intel_super *super)
3158 {
3159 __free_imsm(super, 1);
3160 free(super);
3161 }
3162
3163 static void free_super_imsm(struct supertype *st)
3164 {
3165 struct intel_super *super = st->sb;
3166
3167 if (!super)
3168 return;
3169
3170 free_imsm(super);
3171 st->sb = NULL;
3172 }
3173
3174 static struct intel_super *alloc_super(void)
3175 {
3176 struct intel_super *super = malloc(sizeof(*super));
3177
3178 if (super) {
3179 memset(super, 0, sizeof(*super));
3180 super->current_vol = -1;
3181 super->create_offset = ~((__u32 ) 0);
3182 }
3183 return super;
3184 }
3185
3186 /*
3187 * find and allocate hba and OROM/EFI based on valid fd of RAID component device
3188 */
3189 static int find_intel_hba_capability(int fd, struct intel_super *super, char *devname)
3190 {
3191 struct sys_dev *hba_name;
3192 int rv = 0;
3193
3194 if ((fd < 0) || check_env("IMSM_NO_PLATFORM")) {
3195 super->orom = NULL;
3196 super->hba = NULL;
3197 return 0;
3198 }
3199 hba_name = find_disk_attached_hba(fd, NULL);
3200 if (!hba_name) {
3201 if (devname)
3202 fprintf(stderr,
3203 Name ": %s is not attached to Intel(R) RAID controller.\n",
3204 devname);
3205 return 1;
3206 }
3207 rv = attach_hba_to_super(super, hba_name);
3208 if (rv == 2) {
3209 if (devname) {
3210 struct intel_hba *hba = super->hba;
3211
3212 fprintf(stderr, Name ": %s is attached to Intel(R) %s RAID "
3213 "controller (%s),\n"
3214 " but the container is assigned to Intel(R) "
3215 "%s RAID controller (",
3216 devname,
3217 hba_name->path,
3218 hba_name->pci_id ? : "Err!",
3219 get_sys_dev_type(hba_name->type));
3220
3221 while (hba) {
3222 fprintf(stderr, "%s", hba->pci_id ? : "Err!");
3223 if (hba->next)
3224 fprintf(stderr, ", ");
3225 hba = hba->next;
3226 }
3227
3228 fprintf(stderr, ").\n"
3229 " Mixing devices attached to different controllers "
3230 "is not allowed.\n");
3231 }
3232 free_sys_dev(&hba_name);
3233 return 2;
3234 }
3235 super->orom = find_imsm_capability(hba_name->type);
3236 free_sys_dev(&hba_name);
3237 if (!super->orom)
3238 return 3;
3239 return 0;
3240 }
3241
3242 #ifndef MDASSEMBLE
3243 /* find_missing - helper routine for load_super_imsm_all that identifies
3244 * disks that have disappeared from the system. This routine relies on
3245 * the mpb being uptodate, which it is at load time.
3246 */
3247 static int find_missing(struct intel_super *super)
3248 {
3249 int i;
3250 struct imsm_super *mpb = super->anchor;
3251 struct dl *dl;
3252 struct imsm_disk *disk;
3253
3254 for (i = 0; i < mpb->num_disks; i++) {
3255 disk = __get_imsm_disk(mpb, i);
3256 dl = serial_to_dl(disk->serial, super);
3257 if (dl)
3258 continue;
3259
3260 dl = malloc(sizeof(*dl));
3261 if (!dl)
3262 return 1;
3263 dl->major = 0;
3264 dl->minor = 0;
3265 dl->fd = -1;
3266 dl->devname = strdup("missing");
3267 dl->index = i;
3268 serialcpy(dl->serial, disk->serial);
3269 dl->disk = *disk;
3270 dl->e = NULL;
3271 dl->next = super->missing;
3272 super->missing = dl;
3273 }
3274
3275 return 0;
3276 }
3277
3278 static struct intel_disk *disk_list_get(__u8 *serial, struct intel_disk *disk_list)
3279 {
3280 struct intel_disk *idisk = disk_list;
3281
3282 while (idisk) {
3283 if (serialcmp(idisk->disk.serial, serial) == 0)
3284 break;
3285 idisk = idisk->next;
3286 }
3287
3288 return idisk;
3289 }
3290
3291 static int __prep_thunderdome(struct intel_super **table, int tbl_size,
3292 struct intel_super *super,
3293 struct intel_disk **disk_list)
3294 {
3295 struct imsm_disk *d = &super->disks->disk;
3296 struct imsm_super *mpb = super->anchor;
3297 int i, j;
3298
3299 for (i = 0; i < tbl_size; i++) {
3300 struct imsm_super *tbl_mpb = table[i]->anchor;
3301 struct imsm_disk *tbl_d = &table[i]->disks->disk;
3302
3303 if (tbl_mpb->family_num == mpb->family_num) {
3304 if (tbl_mpb->check_sum == mpb->check_sum) {
3305 dprintf("%s: mpb from %d:%d matches %d:%d\n",
3306 __func__, super->disks->major,
3307 super->disks->minor,
3308 table[i]->disks->major,
3309 table[i]->disks->minor);
3310 break;
3311 }
3312
3313 if (((is_configured(d) && !is_configured(tbl_d)) ||
3314 is_configured(d) == is_configured(tbl_d)) &&
3315 tbl_mpb->generation_num < mpb->generation_num) {
3316 /* current version of the mpb is a
3317 * better candidate than the one in
3318 * super_table, but copy over "cross
3319 * generational" status
3320 */
3321 struct intel_disk *idisk;
3322
3323 dprintf("%s: mpb from %d:%d replaces %d:%d\n",
3324 __func__, super->disks->major,
3325 super->disks->minor,
3326 table[i]->disks->major,
3327 table[i]->disks->minor);
3328
3329 idisk = disk_list_get(tbl_d->serial, *disk_list);
3330 if (idisk && is_failed(&idisk->disk))
3331 tbl_d->status |= FAILED_DISK;
3332 break;
3333 } else {
3334 struct intel_disk *idisk;
3335 struct imsm_disk *disk;
3336
3337 /* tbl_mpb is more up to date, but copy
3338 * over cross generational status before
3339 * returning
3340 */
3341 disk = __serial_to_disk(d->serial, mpb, NULL);
3342 if (disk && is_failed(disk))
3343 d->status |= FAILED_DISK;
3344
3345 idisk = disk_list_get(d->serial, *disk_list);
3346 if (idisk) {
3347 idisk->owner = i;
3348 if (disk && is_configured(disk))
3349 idisk->disk.status |= CONFIGURED_DISK;
3350 }
3351
3352 dprintf("%s: mpb from %d:%d prefer %d:%d\n",
3353 __func__, super->disks->major,
3354 super->disks->minor,
3355 table[i]->disks->major,
3356 table[i]->disks->minor);
3357
3358 return tbl_size;
3359 }
3360 }
3361 }
3362
3363 if (i >= tbl_size)
3364 table[tbl_size++] = super;
3365 else
3366 table[i] = super;
3367
3368 /* update/extend the merged list of imsm_disk records */
3369 for (j = 0; j < mpb->num_disks; j++) {
3370 struct imsm_disk *disk = __get_imsm_disk(mpb, j);
3371 struct intel_disk *idisk;
3372
3373 idisk = disk_list_get(disk->serial, *disk_list);
3374 if (idisk) {
3375 idisk->disk.status |= disk->status;
3376 if (is_configured(&idisk->disk) ||
3377 is_failed(&idisk->disk))
3378 idisk->disk.status &= ~(SPARE_DISK);
3379 } else {
3380 idisk = calloc(1, sizeof(*idisk));
3381 if (!idisk)
3382 return -1;
3383 idisk->owner = IMSM_UNKNOWN_OWNER;
3384 idisk->disk = *disk;
3385 idisk->next = *disk_list;
3386 *disk_list = idisk;
3387 }
3388
3389 if (serialcmp(idisk->disk.serial, d->serial) == 0)
3390 idisk->owner = i;
3391 }
3392
3393 return tbl_size;
3394 }
3395
3396 static struct intel_super *
3397 validate_members(struct intel_super *super, struct intel_disk *disk_list,
3398 const int owner)
3399 {
3400 struct imsm_super *mpb = super->anchor;
3401 int ok_count = 0;
3402 int i;
3403
3404 for (i = 0; i < mpb->num_disks; i++) {
3405 struct imsm_disk *disk = __get_imsm_disk(mpb, i);
3406 struct intel_disk *idisk;
3407
3408 idisk = disk_list_get(disk->serial, disk_list);
3409 if (idisk) {
3410 if (idisk->owner == owner ||
3411 idisk->owner == IMSM_UNKNOWN_OWNER)
3412 ok_count++;
3413 else
3414 dprintf("%s: '%.16s' owner %d != %d\n",
3415 __func__, disk->serial, idisk->owner,
3416 owner);
3417 } else {
3418 dprintf("%s: unknown disk %x [%d]: %.16s\n",
3419 __func__, __le32_to_cpu(mpb->family_num), i,
3420 disk->serial);
3421 break;
3422 }
3423 }
3424
3425 if (ok_count == mpb->num_disks)
3426 return super;
3427 return NULL;
3428 }
3429
3430 static void show_conflicts(__u32 family_num, struct intel_super *super_list)
3431 {
3432 struct intel_super *s;
3433
3434 for (s = super_list; s; s = s->next) {
3435 if (family_num != s->anchor->family_num)
3436 continue;
3437 fprintf(stderr, "Conflict, offlining family %#x on '%s'\n",
3438 __le32_to_cpu(family_num), s->disks->devname);
3439 }
3440 }
3441
3442 static struct intel_super *
3443 imsm_thunderdome(struct intel_super **super_list, int len)
3444 {
3445 struct intel_super *super_table[len];
3446 struct intel_disk *disk_list = NULL;
3447 struct intel_super *champion, *spare;
3448 struct intel_super *s, **del;
3449 int tbl_size = 0;
3450 int conflict;
3451 int i;
3452
3453 memset(super_table, 0, sizeof(super_table));
3454 for (s = *super_list; s; s = s->next)
3455 tbl_size = __prep_thunderdome(super_table, tbl_size, s, &disk_list);
3456
3457 for (i = 0; i < tbl_size; i++) {
3458 struct imsm_disk *d;
3459 struct intel_disk *idisk;
3460 struct imsm_super *mpb = super_table[i]->anchor;
3461
3462 s = super_table[i];
3463 d = &s->disks->disk;
3464
3465 /* 'd' must appear in merged disk list for its
3466 * configuration to be valid
3467 */
3468 idisk = disk_list_get(d->serial, disk_list);
3469 if (idisk && idisk->owner == i)
3470 s = validate_members(s, disk_list, i);
3471 else
3472 s = NULL;
3473
3474 if (!s)
3475 dprintf("%s: marking family: %#x from %d:%d offline\n",
3476 __func__, mpb->family_num,
3477 super_table[i]->disks->major,
3478 super_table[i]->disks->minor);
3479 super_table[i] = s;
3480 }
3481
3482 /* This is where the mdadm implementation differs from the Windows
3483 * driver which has no strict concept of a container. We can only
3484 * assemble one family from a container, so when returning a prodigal
3485 * array member to this system the code will not be able to disambiguate
3486 * the container contents that should be assembled ("foreign" versus
3487 * "local"). It requires user intervention to set the orig_family_num
3488 * to a new value to establish a new container. The Windows driver in
3489 * this situation fixes up the volume name in place and manages the
3490 * foreign array as an independent entity.
3491 */
3492 s = NULL;
3493 spare = NULL;
3494 conflict = 0;
3495 for (i = 0; i < tbl_size; i++) {
3496 struct intel_super *tbl_ent = super_table[i];
3497 int is_spare = 0;
3498
3499 if (!tbl_ent)
3500 continue;
3501
3502 if (tbl_ent->anchor->num_raid_devs == 0) {
3503 spare = tbl_ent;
3504 is_spare = 1;
3505 }
3506
3507 if (s && !is_spare) {
3508 show_conflicts(tbl_ent->anchor->family_num, *super_list);
3509 conflict++;
3510 } else if (!s && !is_spare)
3511 s = tbl_ent;
3512 }
3513
3514 if (!s)
3515 s = spare;
3516 if (!s) {
3517 champion = NULL;
3518 goto out;
3519 }
3520 champion = s;
3521
3522 if (conflict)
3523 fprintf(stderr, "Chose family %#x on '%s', "
3524 "assemble conflicts to new container with '--update=uuid'\n",
3525 __le32_to_cpu(s->anchor->family_num), s->disks->devname);
3526
3527 /* collect all dl's onto 'champion', and update them to
3528 * champion's version of the status
3529 */
3530 for (s = *super_list; s; s = s->next) {
3531 struct imsm_super *mpb = champion->anchor;
3532 struct dl *dl = s->disks;
3533
3534 if (s == champion)
3535 continue;
3536
3537 for (i = 0; i < mpb->num_disks; i++) {
3538 struct imsm_disk *disk;
3539
3540 disk = __serial_to_disk(dl->serial, mpb, &dl->index);
3541 if (disk) {
3542 dl->disk = *disk;
3543 /* only set index on disks that are a member of
3544 * a populated contianer, i.e. one with
3545 * raid_devs
3546 */
3547 if (is_failed(&dl->disk))
3548 dl->index = -2;
3549 else if (is_spare(&dl->disk))
3550 dl->index = -1;
3551 break;
3552 }
3553 }
3554
3555 if (i >= mpb->num_disks) {
3556 struct intel_disk *idisk;
3557
3558 idisk = disk_list_get(dl->serial, disk_list);
3559 if (idisk && is_spare(&idisk->disk) &&
3560 !is_failed(&idisk->disk) && !is_configured(&idisk->disk))
3561 dl->index = -1;
3562 else {
3563 dl->index = -2;
3564 continue;
3565 }
3566 }
3567
3568 dl->next = champion->disks;
3569 champion->disks = dl;
3570 s->disks = NULL;
3571 }
3572
3573 /* delete 'champion' from super_list */
3574 for (del = super_list; *del; ) {
3575 if (*del == champion) {
3576 *del = (*del)->next;
3577 break;
3578 } else
3579 del = &(*del)->next;
3580 }
3581 champion->next = NULL;
3582
3583 out:
3584 while (disk_list) {
3585 struct intel_disk *idisk = disk_list;
3586
3587 disk_list = disk_list->next;
3588 free(idisk);
3589 }
3590
3591 return champion;
3592 }
3593
3594 static int load_super_imsm_all(struct supertype *st, int fd, void **sbp,
3595 char *devname)
3596 {
3597 struct mdinfo *sra;
3598 struct intel_super *super_list = NULL;
3599 struct intel_super *super = NULL;
3600 int devnum = fd2devnum(fd);
3601 struct mdinfo *sd;
3602 int retry;
3603 int err = 0;
3604 int i;
3605
3606 /* check if 'fd' an opened container */
3607 sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
3608 if (!sra)
3609 return 1;
3610
3611 if (sra->array.major_version != -1 ||
3612 sra->array.minor_version != -2 ||
3613 strcmp(sra->text_version, "imsm") != 0) {
3614 err = 1;
3615 goto error;
3616 }
3617 /* load all mpbs */
3618 for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
3619 struct intel_super *s = alloc_super();
3620 char nm[32];
3621 int dfd;
3622 int rv;
3623
3624 err = 1;
3625 if (!s)
3626 goto error;
3627 s->next = super_list;
3628 super_list = s;
3629
3630 err = 2;
3631 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3632 dfd = dev_open(nm, O_RDWR);
3633 if (dfd < 0)
3634 goto error;
3635
3636 rv = find_intel_hba_capability(dfd, s, devname);
3637 /* no orom/efi or non-intel hba of the disk */
3638 if (rv != 0)
3639 goto error;
3640
3641 err = load_and_parse_mpb(dfd, s, NULL, 1);
3642
3643 /* retry the load if we might have raced against mdmon */
3644 if (err == 3 && mdmon_running(devnum))
3645 for (retry = 0; retry < 3; retry++) {
3646 usleep(3000);
3647 err = load_and_parse_mpb(dfd, s, NULL, 1);
3648 if (err != 3)
3649 break;
3650 }
3651 if (err)
3652 goto error;
3653 }
3654
3655 /* all mpbs enter, maybe one leaves */
3656 super = imsm_thunderdome(&super_list, i);
3657 if (!super) {
3658 err = 1;
3659 goto error;
3660 }
3661
3662 if (find_missing(super) != 0) {
3663 free_imsm(super);
3664 err = 2;
3665 goto error;
3666 }
3667
3668 /* load migration record */
3669 err = load_imsm_migr_rec(super, NULL);
3670 if (err) {
3671 err = 4;
3672 goto error;
3673 }
3674
3675 /* Check migration compatibility */
3676 if (check_mpb_migr_compatibility(super) != 0) {
3677 fprintf(stderr, Name ": Unsupported migration detected");
3678 if (devname)
3679 fprintf(stderr, " on %s\n", devname);
3680 else
3681 fprintf(stderr, " (IMSM).\n");
3682
3683 err = 5;
3684 goto error;
3685 }
3686
3687 err = 0;
3688
3689 error:
3690 while (super_list) {
3691 struct intel_super *s = super_list;
3692
3693 super_list = super_list->next;
3694 free_imsm(s);
3695 }
3696 sysfs_free(sra);
3697
3698 if (err)
3699 return err;
3700
3701 *sbp = super;
3702 st->container_dev = devnum;
3703 if (err == 0 && st->ss == NULL) {
3704 st->ss = &super_imsm;
3705 st->minor_version = 0;
3706 st->max_devs = IMSM_MAX_DEVICES;
3707 }
3708 return 0;
3709 }
3710
3711 static int load_container_imsm(struct supertype *st, int fd, char *devname)
3712 {
3713 return load_super_imsm_all(st, fd, &st->sb, devname);
3714 }
3715 #endif
3716
3717 static int load_super_imsm(struct supertype *st, int fd, char *devname)
3718 {
3719 struct intel_super *super;
3720 int rv;
3721
3722 if (test_partition(fd))
3723 /* IMSM not allowed on partitions */
3724 return 1;
3725
3726 free_super_imsm(st);
3727
3728 super = alloc_super();
3729 if (!super) {
3730 fprintf(stderr,
3731 Name ": malloc of %zu failed.\n",
3732 sizeof(*super));
3733 return 1;
3734 }
3735 /* Load hba and capabilities if they exist.
3736 * But do not preclude loading metadata in case capabilities or hba are
3737 * non-compliant and ignore_hw_compat is set.
3738 */
3739 rv = find_intel_hba_capability(fd, super, devname);
3740 /* no orom/efi or non-intel hba of the disk */
3741 if ((rv != 0) && (st->ignore_hw_compat == 0)) {
3742 if (devname)
3743 fprintf(stderr,
3744 Name ": No OROM/EFI properties for %s\n", devname);
3745 free_imsm(super);
3746 return 2;
3747 }
3748 rv = load_and_parse_mpb(fd, super, devname, 0);
3749
3750 if (rv) {
3751 if (devname)
3752 fprintf(stderr,
3753 Name ": Failed to load all information "
3754 "sections on %s\n", devname);
3755 free_imsm(super);
3756 return rv;
3757 }
3758
3759 st->sb = super;
3760 if (st->ss == NULL) {
3761 st->ss = &super_imsm;
3762 st->minor_version = 0;
3763 st->max_devs = IMSM_MAX_DEVICES;
3764 }
3765
3766 /* load migration record */
3767 load_imsm_migr_rec(super, NULL);
3768
3769 /* Check for unsupported migration features */
3770 if (check_mpb_migr_compatibility(super) != 0) {
3771 fprintf(stderr, Name ": Unsupported migration detected");
3772 if (devname)
3773 fprintf(stderr, " on %s\n", devname);
3774 else
3775 fprintf(stderr, " (IMSM).\n");
3776 return 3;
3777 }
3778
3779 return 0;
3780 }
3781
3782 static __u16 info_to_blocks_per_strip(mdu_array_info_t *info)
3783 {
3784 if (info->level == 1)
3785 return 128;
3786 return info->chunk_size >> 9;
3787 }
3788
3789 static __u32 info_to_num_data_stripes(mdu_array_info_t *info, int num_domains)
3790 {
3791 __u32 num_stripes;
3792
3793 num_stripes = (info->size * 2) / info_to_blocks_per_strip(info);
3794 num_stripes /= num_domains;
3795
3796 return num_stripes;
3797 }
3798
3799 static __u32 info_to_blocks_per_member(mdu_array_info_t *info)
3800 {
3801 if (info->level == 1)
3802 return info->size * 2;
3803 else
3804 return (info->size * 2) & ~(info_to_blocks_per_strip(info) - 1);
3805 }
3806
3807 static void imsm_update_version_info(struct intel_super *super)
3808 {
3809 /* update the version and attributes */
3810 struct imsm_super *mpb = super->anchor;
3811 char *version;
3812 struct imsm_dev *dev;
3813 struct imsm_map *map;
3814 int i;
3815
3816 for (i = 0; i < mpb->num_raid_devs; i++) {
3817 dev = get_imsm_dev(super, i);
3818 map = get_imsm_map(dev, 0);
3819 if (__le32_to_cpu(dev->size_high) > 0)
3820 mpb->attributes |= MPB_ATTRIB_2TB;
3821
3822 /* FIXME detect when an array spans a port multiplier */
3823 #if 0
3824 mpb->attributes |= MPB_ATTRIB_PM;
3825 #endif
3826
3827 if (mpb->num_raid_devs > 1 ||
3828 mpb->attributes != MPB_ATTRIB_CHECKSUM_VERIFY) {
3829 version = MPB_VERSION_ATTRIBS;
3830 switch (get_imsm_raid_level(map)) {
3831 case 0: mpb->attributes |= MPB_ATTRIB_RAID0; break;
3832 case 1: mpb->attributes |= MPB_ATTRIB_RAID1; break;
3833 case 10: mpb->attributes |= MPB_ATTRIB_RAID10; break;
3834 case 5: mpb->attributes |= MPB_ATTRIB_RAID5; break;
3835 }
3836 } else {
3837 if (map->num_members >= 5)
3838 version = MPB_VERSION_5OR6_DISK_ARRAY;
3839 else if (dev->status == DEV_CLONE_N_GO)
3840 version = MPB_VERSION_CNG;
3841 else if (get_imsm_raid_level(map) == 5)
3842 version = MPB_VERSION_RAID5;
3843 else if (map->num_members >= 3)
3844 version = MPB_VERSION_3OR4_DISK_ARRAY;
3845 else if (get_imsm_raid_level(map) == 1)
3846 version = MPB_VERSION_RAID1;
3847 else
3848 version = MPB_VERSION_RAID0;
3849 }
3850 strcpy(((char *) mpb->sig) + strlen(MPB_SIGNATURE), version);
3851 }
3852 }
3853
3854 static int check_name(struct intel_super *super, char *name, int quiet)
3855 {
3856 struct imsm_super *mpb = super->anchor;
3857 char *reason = NULL;
3858 int i;
3859
3860 if (strlen(name) > MAX_RAID_SERIAL_LEN)
3861 reason = "must be 16 characters or less";
3862
3863 for (i = 0; i < mpb->num_raid_devs; i++) {
3864 struct imsm_dev *dev = get_imsm_dev(super, i);
3865
3866 if (strncmp((char *) dev->volume, name, MAX_RAID_SERIAL_LEN) == 0) {
3867 reason = "already exists";
3868 break;
3869 }
3870 }
3871
3872 if (reason && !quiet)
3873 fprintf(stderr, Name ": imsm volume name %s\n", reason);
3874
3875 return !reason;
3876 }
3877
3878 static int init_super_imsm_volume(struct supertype *st, mdu_array_info_t *info,
3879 unsigned long long size, char *name,
3880 char *homehost, int *uuid)
3881 {
3882 /* We are creating a volume inside a pre-existing container.
3883 * so st->sb is already set.
3884 */
3885 struct intel_super *super = st->sb;
3886 struct imsm_super *mpb = super->anchor;
3887 struct intel_dev *dv;
3888 struct imsm_dev *dev;
3889 struct imsm_vol *vol;
3890 struct imsm_map *map;
3891 int idx = mpb->num_raid_devs;
3892 int i;
3893 unsigned long long array_blocks;
3894 size_t size_old, size_new;
3895 __u32 num_data_stripes;
3896
3897 if (super->orom && mpb->num_raid_devs >= super->orom->vpa) {
3898 fprintf(stderr, Name": This imsm-container already has the "
3899 "maximum of %d volumes\n", super->orom->vpa);
3900 return 0;
3901 }
3902
3903 /* ensure the mpb is large enough for the new data */
3904 size_old = __le32_to_cpu(mpb->mpb_size);
3905 size_new = disks_to_mpb_size(info->nr_disks);
3906 if (size_new > size_old) {
3907 void *mpb_new;
3908 size_t size_round = ROUND_UP(size_new, 512);
3909
3910 if (posix_memalign(&mpb_new, 512, size_round) != 0) {
3911 fprintf(stderr, Name": could not allocate new mpb\n");
3912 return 0;
3913 }
3914 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
3915 fprintf(stderr, Name
3916 ": %s could not allocate migr_rec buffer\n",
3917 __func__);
3918 free(super->buf);
3919 free(super);
3920 return 0;
3921 }
3922 memcpy(mpb_new, mpb, size_old);
3923 free(mpb);
3924 mpb = mpb_new;
3925 super->anchor = mpb_new;
3926 mpb->mpb_size = __cpu_to_le32(size_new);
3927 memset(mpb_new + size_old, 0, size_round - size_old);
3928 }
3929 super->current_vol = idx;
3930 /* when creating the first raid device in this container set num_disks
3931 * to zero, i.e. delete this spare and add raid member devices in
3932 * add_to_super_imsm_volume()
3933 */
3934 if (super->current_vol == 0)
3935 mpb->num_disks = 0;
3936
3937 if (!check_name(super, name, 0))
3938 return 0;
3939 dv = malloc(sizeof(*dv));
3940 if (!dv) {
3941 fprintf(stderr, Name ": failed to allocate device list entry\n");
3942 return 0;
3943 }
3944 dev = calloc(1, sizeof(*dev) + sizeof(__u32) * (info->raid_disks - 1));
3945 if (!dev) {
3946 free(dv);
3947 fprintf(stderr, Name": could not allocate raid device\n");
3948 return 0;
3949 }
3950
3951 strncpy((char *) dev->volume, name, MAX_RAID_SERIAL_LEN);
3952 if (info->level == 1)
3953 array_blocks = info_to_blocks_per_member(info);
3954 else
3955 array_blocks = calc_array_size(info->level, info->raid_disks,
3956 info->layout, info->chunk_size,
3957 info->size*2);
3958 /* round array size down to closest MB */
3959 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
3960
3961 dev->size_low = __cpu_to_le32((__u32) array_blocks);
3962 dev->size_high = __cpu_to_le32((__u32) (array_blocks >> 32));
3963 dev->status = (DEV_READ_COALESCING | DEV_WRITE_COALESCING);
3964 vol = &dev->vol;
3965 vol->migr_state = 0;
3966 set_migr_type(dev, MIGR_INIT);
3967 vol->dirty = 0;
3968 vol->curr_migr_unit = 0;
3969 map = get_imsm_map(dev, 0);
3970 map->pba_of_lba0 = __cpu_to_le32(super->create_offset);
3971 map->blocks_per_member = __cpu_to_le32(info_to_blocks_per_member(info));
3972 map->blocks_per_strip = __cpu_to_le16(info_to_blocks_per_strip(info));
3973 map->failed_disk_num = ~0;
3974 map->map_state = info->level ? IMSM_T_STATE_UNINITIALIZED :
3975 IMSM_T_STATE_NORMAL;
3976 map->ddf = 1;
3977
3978 if (info->level == 1 && info->raid_disks > 2) {
3979 free(dev);
3980 free(dv);
3981 fprintf(stderr, Name": imsm does not support more than 2 disks"
3982 "in a raid1 volume\n");
3983 return 0;
3984 }
3985
3986 map->raid_level = info->level;
3987 if (info->level == 10) {
3988 map->raid_level = 1;
3989 map->num_domains = info->raid_disks / 2;
3990 } else if (info->level == 1)
3991 map->num_domains = info->raid_disks;
3992 else
3993 map->num_domains = 1;
3994
3995 num_data_stripes = info_to_num_data_stripes(info, map->num_domains);
3996 map->num_data_stripes = __cpu_to_le32(num_data_stripes);
3997
3998 map->num_members = info->raid_disks;
3999 for (i = 0; i < map->num_members; i++) {
4000 /* initialized in add_to_super */
4001 set_imsm_ord_tbl_ent(map, i, IMSM_ORD_REBUILD);
4002 }
4003 mpb->num_raid_devs++;
4004
4005 dv->dev = dev;
4006 dv->index = super->current_vol;
4007 dv->next = super->devlist;
4008 super->devlist = dv;
4009
4010 imsm_update_version_info(super);
4011
4012 return 1;
4013 }
4014
4015 static int init_super_imsm(struct supertype *st, mdu_array_info_t *info,
4016 unsigned long long size, char *name,
4017 char *homehost, int *uuid)
4018 {
4019 /* This is primarily called by Create when creating a new array.
4020 * We will then get add_to_super called for each component, and then
4021 * write_init_super called to write it out to each device.
4022 * For IMSM, Create can create on fresh devices or on a pre-existing
4023 * array.
4024 * To create on a pre-existing array a different method will be called.
4025 * This one is just for fresh drives.
4026 */
4027 struct intel_super *super;
4028 struct imsm_super *mpb;
4029 size_t mpb_size;
4030 char *version;
4031
4032 if (st->sb)
4033 return init_super_imsm_volume(st, info, size, name, homehost, uuid);
4034
4035 if (info)
4036 mpb_size = disks_to_mpb_size(info->nr_disks);
4037 else
4038 mpb_size = 512;
4039
4040 super = alloc_super();
4041 if (super && posix_memalign(&super->buf, 512, mpb_size) != 0) {
4042 free(super);
4043 super = NULL;
4044 }
4045 if (!super) {
4046 fprintf(stderr, Name
4047 ": %s could not allocate superblock\n", __func__);
4048 return 0;
4049 }
4050 if (posix_memalign(&super->migr_rec_buf, 512, 512) != 0) {
4051 fprintf(stderr, Name
4052 ": %s could not allocate migr_rec buffer\n", __func__);
4053 free(super->buf);
4054 free(super);
4055 return 0;
4056 }
4057 memset(super->buf, 0, mpb_size);
4058 mpb = super->buf;
4059 mpb->mpb_size = __cpu_to_le32(mpb_size);
4060 st->sb = super;
4061
4062 if (info == NULL) {
4063 /* zeroing superblock */
4064 return 0;
4065 }
4066
4067 mpb->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4068
4069 version = (char *) mpb->sig;
4070 strcpy(version, MPB_SIGNATURE);
4071 version += strlen(MPB_SIGNATURE);
4072 strcpy(version, MPB_VERSION_RAID0);
4073
4074 return 1;
4075 }
4076
4077 #ifndef MDASSEMBLE
4078 static int add_to_super_imsm_volume(struct supertype *st, mdu_disk_info_t *dk,
4079 int fd, char *devname)
4080 {
4081 struct intel_super *super = st->sb;
4082 struct imsm_super *mpb = super->anchor;
4083 struct dl *dl;
4084 struct imsm_dev *dev;
4085 struct imsm_map *map;
4086 int slot;
4087
4088 dev = get_imsm_dev(super, super->current_vol);
4089 map = get_imsm_map(dev, 0);
4090
4091 if (! (dk->state & (1<<MD_DISK_SYNC))) {
4092 fprintf(stderr, Name ": %s: Cannot add spare devices to IMSM volume\n",
4093 devname);
4094 return 1;
4095 }
4096
4097 if (fd == -1) {
4098 /* we're doing autolayout so grab the pre-marked (in
4099 * validate_geometry) raid_disk
4100 */
4101 for (dl = super->disks; dl; dl = dl->next)
4102 if (dl->raiddisk == dk->raid_disk)
4103 break;
4104 } else {
4105 for (dl = super->disks; dl ; dl = dl->next)
4106 if (dl->major == dk->major &&
4107 dl->minor == dk->minor)
4108 break;
4109 }
4110
4111 if (!dl) {
4112 fprintf(stderr, Name ": %s is not a member of the same container\n", devname);
4113 return 1;
4114 }
4115
4116 /* add a pristine spare to the metadata */
4117 if (dl->index < 0) {
4118 dl->index = super->anchor->num_disks;
4119 super->anchor->num_disks++;
4120 }
4121 /* Check the device has not already been added */
4122 slot = get_imsm_disk_slot(map, dl->index);
4123 if (slot >= 0 &&
4124 (get_imsm_ord_tbl_ent(dev, slot, -1) & IMSM_ORD_REBUILD) == 0) {
4125 fprintf(stderr, Name ": %s has been included in this array twice\n",
4126 devname);
4127 return 1;
4128 }
4129 set_imsm_ord_tbl_ent(map, dk->number, dl->index);
4130 dl->disk.status = CONFIGURED_DISK;
4131
4132 /* if we are creating the first raid device update the family number */
4133 if (super->current_vol == 0) {
4134 __u32 sum;
4135 struct imsm_dev *_dev = __get_imsm_dev(mpb, 0);
4136 struct imsm_disk *_disk = __get_imsm_disk(mpb, dl->index);
4137
4138 if (!_dev || !_disk) {
4139 fprintf(stderr, Name ": BUG mpb setup error\n");
4140 return 1;
4141 }
4142 *_dev = *dev;
4143 *_disk = dl->disk;
4144 sum = random32();
4145 sum += __gen_imsm_checksum(mpb);
4146 mpb->family_num = __cpu_to_le32(sum);
4147 mpb->orig_family_num = mpb->family_num;
4148 }
4149
4150 return 0;
4151 }
4152
4153
4154 static int add_to_super_imsm(struct supertype *st, mdu_disk_info_t *dk,
4155 int fd, char *devname)
4156 {
4157 struct intel_super *super = st->sb;
4158 struct dl *dd;
4159 unsigned long long size;
4160 __u32 id;
4161 int rv;
4162 struct stat stb;
4163
4164 /* If we are on an RAID enabled platform check that the disk is
4165 * attached to the raid controller.
4166 * We do not need to test disks attachment for container based additions,
4167 * they shall be already tested when container was created/assembled.
4168 */
4169 rv = find_intel_hba_capability(fd, super, devname);
4170 /* no orom/efi or non-intel hba of the disk */
4171 if (rv != 0) {
4172 dprintf("capability: %p fd: %d ret: %d\n",
4173 super->orom, fd, rv);
4174 return 1;
4175 }
4176
4177 if (super->current_vol >= 0)
4178 return add_to_super_imsm_volume(st, dk, fd, devname);
4179
4180 fstat(fd, &stb);
4181 dd = malloc(sizeof(*dd));
4182 if (!dd) {
4183 fprintf(stderr,
4184 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4185 return 1;
4186 }
4187 memset(dd, 0, sizeof(*dd));
4188 dd->major = major(stb.st_rdev);
4189 dd->minor = minor(stb.st_rdev);
4190 dd->index = -1;
4191 dd->devname = devname ? strdup(devname) : NULL;
4192 dd->fd = fd;
4193 dd->e = NULL;
4194 dd->action = DISK_ADD;
4195 rv = imsm_read_serial(fd, devname, dd->serial);
4196 if (rv) {
4197 fprintf(stderr,
4198 Name ": failed to retrieve scsi serial, aborting\n");
4199 free(dd);
4200 abort();
4201 }
4202
4203 get_dev_size(fd, NULL, &size);
4204 size /= 512;
4205 serialcpy(dd->disk.serial, dd->serial);
4206 dd->disk.total_blocks = __cpu_to_le32(size);
4207 dd->disk.status = SPARE_DISK;
4208 if (sysfs_disk_to_scsi_id(fd, &id) == 0)
4209 dd->disk.scsi_id = __cpu_to_le32(id);
4210 else
4211 dd->disk.scsi_id = __cpu_to_le32(0);
4212
4213 if (st->update_tail) {
4214 dd->next = super->disk_mgmt_list;
4215 super->disk_mgmt_list = dd;
4216 } else {
4217 dd->next = super->disks;
4218 super->disks = dd;
4219 super->updates_pending++;
4220 }
4221
4222 return 0;
4223 }
4224
4225
4226 static int remove_from_super_imsm(struct supertype *st, mdu_disk_info_t *dk)
4227 {
4228 struct intel_super *super = st->sb;
4229 struct dl *dd;
4230
4231 /* remove from super works only in mdmon - for communication
4232 * manager - monitor. Check if communication memory buffer
4233 * is prepared.
4234 */
4235 if (!st->update_tail) {
4236 fprintf(stderr,
4237 Name ": %s shall be used in mdmon context only"
4238 "(line %d).\n", __func__, __LINE__);
4239 return 1;
4240 }
4241 dd = malloc(sizeof(*dd));
4242 if (!dd) {
4243 fprintf(stderr,
4244 Name ": malloc failed %s:%d.\n", __func__, __LINE__);
4245 return 1;
4246 }
4247 memset(dd, 0, sizeof(*dd));
4248 dd->major = dk->major;
4249 dd->minor = dk->minor;
4250 dd->index = -1;
4251 dd->fd = -1;
4252 dd->disk.status = SPARE_DISK;
4253 dd->action = DISK_REMOVE;
4254
4255 dd->next = super->disk_mgmt_list;
4256 super->disk_mgmt_list = dd;
4257
4258
4259 return 0;
4260 }
4261
4262 static int store_imsm_mpb(int fd, struct imsm_super *mpb);
4263
4264 static union {
4265 char buf[512];
4266 struct imsm_super anchor;
4267 } spare_record __attribute__ ((aligned(512)));
4268
4269 /* spare records have their own family number and do not have any defined raid
4270 * devices
4271 */
4272 static int write_super_imsm_spares(struct intel_super *super, int doclose)
4273 {
4274 struct imsm_super *mpb = super->anchor;
4275 struct imsm_super *spare = &spare_record.anchor;
4276 __u32 sum;
4277 struct dl *d;
4278
4279 spare->mpb_size = __cpu_to_le32(sizeof(struct imsm_super)),
4280 spare->generation_num = __cpu_to_le32(1UL),
4281 spare->attributes = MPB_ATTRIB_CHECKSUM_VERIFY;
4282 spare->num_disks = 1,
4283 spare->num_raid_devs = 0,
4284 spare->cache_size = mpb->cache_size,
4285 spare->pwr_cycle_count = __cpu_to_le32(1),
4286
4287 snprintf((char *) spare->sig, MAX_SIGNATURE_LENGTH,
4288 MPB_SIGNATURE MPB_VERSION_RAID0);
4289
4290 for (d = super->disks; d; d = d->next) {
4291 if (d->index != -1)
4292 continue;
4293
4294 spare->disk[0] = d->disk;
4295 sum = __gen_imsm_checksum(spare);
4296 spare->family_num = __cpu_to_le32(sum);
4297 spare->orig_family_num = 0;
4298 sum = __gen_imsm_checksum(spare);
4299 spare->check_sum = __cpu_to_le32(sum);
4300
4301 if (store_imsm_mpb(d->fd, spare)) {
4302 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4303 __func__, d->major, d->minor, strerror(errno));
4304 return 1;
4305 }
4306 if (doclose) {
4307 close(d->fd);
4308 d->fd = -1;
4309 }
4310 }
4311
4312 return 0;
4313 }
4314
4315 static int is_gen_migration(struct imsm_dev *dev);
4316
4317 static int write_super_imsm(struct supertype *st, int doclose)
4318 {
4319 struct intel_super *super = st->sb;
4320 struct imsm_super *mpb = super->anchor;
4321 struct dl *d;
4322 __u32 generation;
4323 __u32 sum;
4324 int spares = 0;
4325 int i;
4326 __u32 mpb_size = sizeof(struct imsm_super) - sizeof(struct imsm_disk);
4327 int num_disks = 0;
4328 int clear_migration_record = 1;
4329
4330 /* 'generation' is incremented everytime the metadata is written */
4331 generation = __le32_to_cpu(mpb->generation_num);
4332 generation++;
4333 mpb->generation_num = __cpu_to_le32(generation);
4334
4335 /* fix up cases where previous mdadm releases failed to set
4336 * orig_family_num
4337 */
4338 if (mpb->orig_family_num == 0)
4339 mpb->orig_family_num = mpb->family_num;
4340
4341 for (d = super->disks; d; d = d->next) {
4342 if (d->index == -1)
4343 spares++;
4344 else {
4345 mpb->disk[d->index] = d->disk;
4346 num_disks++;
4347 }
4348 }
4349 for (d = super->missing; d; d = d->next) {
4350 mpb->disk[d->index] = d->disk;
4351 num_disks++;
4352 }
4353 mpb->num_disks = num_disks;
4354 mpb_size += sizeof(struct imsm_disk) * mpb->num_disks;
4355
4356 for (i = 0; i < mpb->num_raid_devs; i++) {
4357 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
4358 struct imsm_dev *dev2 = get_imsm_dev(super, i);
4359 if (dev && dev2) {
4360 imsm_copy_dev(dev, dev2);
4361 mpb_size += sizeof_imsm_dev(dev, 0);
4362 }
4363 if (is_gen_migration(dev2))
4364 clear_migration_record = 0;
4365 }
4366 mpb_size += __le32_to_cpu(mpb->bbm_log_size);
4367 mpb->mpb_size = __cpu_to_le32(mpb_size);
4368
4369 /* recalculate checksum */
4370 sum = __gen_imsm_checksum(mpb);
4371 mpb->check_sum = __cpu_to_le32(sum);
4372
4373 if (clear_migration_record)
4374 memset(super->migr_rec_buf, 0, 512);
4375
4376 /* write the mpb for disks that compose raid devices */
4377 for (d = super->disks; d ; d = d->next) {
4378 if (d->index < 0)
4379 continue;
4380 if (store_imsm_mpb(d->fd, mpb))
4381 fprintf(stderr, "%s: failed for device %d:%d %s\n",
4382 __func__, d->major, d->minor, strerror(errno));
4383 if (clear_migration_record) {
4384 unsigned long long dsize;
4385
4386 get_dev_size(d->fd, NULL, &dsize);
4387 if (lseek64(d->fd, dsize - 512, SEEK_SET) >= 0) {
4388 write(d->fd, super->migr_rec_buf, 512);
4389 }
4390 }
4391 if (doclose) {
4392 close(d->fd);
4393 d->fd = -1;
4394 }
4395 }
4396
4397 if (spares)
4398 return write_super_imsm_spares(super, doclose);
4399
4400 return 0;
4401 }
4402
4403
4404 static int create_array(struct supertype *st, int dev_idx)
4405 {
4406 size_t len;
4407 struct imsm_update_create_array *u;
4408 struct intel_super *super = st->sb;
4409 struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
4410 struct imsm_map *map = get_imsm_map(dev, 0);
4411 struct disk_info *inf;
4412 struct imsm_disk *disk;
4413 int i;
4414
4415 len = sizeof(*u) - sizeof(*dev) + sizeof_imsm_dev(dev, 0) +
4416 sizeof(*inf) * map->num_members;
4417 u = malloc(len);
4418 if (!u) {
4419 fprintf(stderr, "%s: failed to allocate update buffer\n",
4420 __func__);
4421 return 1;
4422 }
4423
4424 u->type = update_create_array;
4425 u->dev_idx = dev_idx;
4426 imsm_copy_dev(&u->dev, dev);
4427 inf = get_disk_info(u);
4428 for (i = 0; i < map->num_members; i++) {
4429 int idx = get_imsm_disk_idx(dev, i, -1);
4430
4431 disk = get_imsm_disk(super, idx);
4432 serialcpy(inf[i].serial, disk->serial);
4433 }
4434 append_metadata_update(st, u, len);
4435
4436 return 0;
4437 }
4438
4439 static int mgmt_disk(struct supertype *st)
4440 {
4441 struct intel_super *super = st->sb;
4442 size_t len;
4443 struct imsm_update_add_remove_disk *u;
4444
4445 if (!super->disk_mgmt_list)
4446 return 0;
4447
4448 len = sizeof(*u);
4449 u = malloc(len);
4450 if (!u) {
4451 fprintf(stderr, "%s: failed to allocate update buffer\n",
4452 __func__);
4453 return 1;
4454 }
4455
4456 u->type = update_add_remove_disk;
4457 append_metadata_update(st, u, len);
4458
4459 return 0;
4460 }
4461
4462 static int write_init_super_imsm(struct supertype *st)
4463 {
4464 struct intel_super *super = st->sb;
4465 int current_vol = super->current_vol;
4466
4467 /* we are done with current_vol reset it to point st at the container */
4468 super->current_vol = -1;
4469
4470 if (st->update_tail) {
4471 /* queue the recently created array / added disk
4472 * as a metadata update */
4473 int rv;
4474
4475 /* determine if we are creating a volume or adding a disk */
4476 if (current_vol < 0) {
4477 /* in the mgmt (add/remove) disk case we are running
4478 * in mdmon context, so don't close fd's
4479 */
4480 return mgmt_disk(st);
4481 } else
4482 rv = create_array(st, current_vol);
4483
4484 return rv;
4485 } else {
4486 struct dl *d;
4487 for (d = super->disks; d; d = d->next)
4488 Kill(d->devname, NULL, 0, 1, 1);
4489 return write_super_imsm(st, 1);
4490 }
4491 }
4492 #endif
4493
4494 static int store_super_imsm(struct supertype *st, int fd)
4495 {
4496 struct intel_super *super = st->sb;
4497 struct imsm_super *mpb = super ? super->anchor : NULL;
4498
4499 if (!mpb)
4500 return 1;
4501
4502 #ifndef MDASSEMBLE
4503 return store_imsm_mpb(fd, mpb);
4504 #else
4505 return 1;
4506 #endif
4507 }
4508
4509 static int imsm_bbm_log_size(struct imsm_super *mpb)
4510 {
4511 return __le32_to_cpu(mpb->bbm_log_size);
4512 }
4513
4514 #ifndef MDASSEMBLE
4515 static int validate_geometry_imsm_container(struct supertype *st, int level,
4516 int layout, int raiddisks, int chunk,
4517 unsigned long long size, char *dev,
4518 unsigned long long *freesize,
4519 int verbose)
4520 {
4521 int fd;
4522 unsigned long long ldsize;
4523 struct intel_super *super=NULL;
4524 int rv = 0;
4525
4526 if (level != LEVEL_CONTAINER)
4527 return 0;
4528 if (!dev)
4529 return 1;
4530
4531 fd = open(dev, O_RDONLY|O_EXCL, 0);
4532 if (fd < 0) {
4533 if (verbose)
4534 fprintf(stderr, Name ": imsm: Cannot open %s: %s\n",
4535 dev, strerror(errno));
4536 return 0;
4537 }
4538 if (!get_dev_size(fd, dev, &ldsize)) {
4539 close(fd);
4540 return 0;
4541 }
4542
4543 /* capabilities retrieve could be possible
4544 * note that there is no fd for the disks in array.
4545 */
4546 super = alloc_super();
4547 if (!super) {
4548 fprintf(stderr,
4549 Name ": malloc of %zu failed.\n",
4550 sizeof(*super));
4551 close(fd);
4552 return 0;
4553 }
4554
4555 rv = find_intel_hba_capability(fd, super, verbose ? dev : NULL);
4556 if (rv != 0) {
4557 #if DEBUG
4558 char str[256];
4559 fd2devname(fd, str);
4560 dprintf("validate_geometry_imsm_container: fd: %d %s orom: %p rv: %d raiddisk: %d\n",
4561 fd, str, super->orom, rv, raiddisks);
4562 #endif
4563 /* no orom/efi or non-intel hba of the disk */
4564 close(fd);
4565 free_imsm(super);
4566 return 0;
4567 }
4568 close(fd);
4569 if (super->orom && raiddisks > super->orom->tds) {
4570 if (verbose)
4571 fprintf(stderr, Name ": %d exceeds maximum number of"
4572 " platform supported disks: %d\n",
4573 raiddisks, super->orom->tds);
4574
4575 free_imsm(super);
4576 return 0;
4577 }
4578
4579 *freesize = avail_size_imsm(st, ldsize >> 9);
4580 free_imsm(super);
4581
4582 return 1;
4583 }
4584
4585 static unsigned long long find_size(struct extent *e, int *idx, int num_extents)
4586 {
4587 const unsigned long long base_start = e[*idx].start;
4588 unsigned long long end = base_start + e[*idx].size;
4589 int i;
4590
4591 if (base_start == end)
4592 return 0;
4593
4594 *idx = *idx + 1;
4595 for (i = *idx; i < num_extents; i++) {
4596 /* extend overlapping extents */
4597 if (e[i].start >= base_start &&
4598 e[i].start <= end) {
4599 if (e[i].size == 0)
4600 return 0;
4601 if (e[i].start + e[i].size > end)
4602 end = e[i].start + e[i].size;
4603 } else if (e[i].start > end) {
4604 *idx = i;
4605 break;
4606 }
4607 }
4608
4609 return end - base_start;
4610 }
4611
4612 static unsigned long long merge_extents(struct intel_super *super, int sum_extents)
4613 {
4614 /* build a composite disk with all known extents and generate a new
4615 * 'maxsize' given the "all disks in an array must share a common start
4616 * offset" constraint
4617 */
4618 struct extent *e = calloc(sum_extents, sizeof(*e));
4619 struct dl *dl;
4620 int i, j;
4621 int start_extent;
4622 unsigned long long pos;
4623 unsigned long long start = 0;
4624 unsigned long long maxsize;
4625 unsigned long reserve;
4626
4627 if (!e)
4628 return 0;
4629
4630 /* coalesce and sort all extents. also, check to see if we need to
4631 * reserve space between member arrays
4632 */
4633 j = 0;
4634 for (dl = super->disks; dl; dl = dl->next) {
4635 if (!dl->e)
4636 continue;
4637 for (i = 0; i < dl->extent_cnt; i++)
4638 e[j++] = dl->e[i];
4639 }
4640 qsort(e, sum_extents, sizeof(*e), cmp_extent);
4641
4642 /* merge extents */
4643 i = 0;
4644 j = 0;
4645 while (i < sum_extents) {
4646 e[j].start = e[i].start;
4647 e[j].size = find_size(e, &i, sum_extents);
4648 j++;
4649 if (e[j-1].size == 0)
4650 break;
4651 }
4652
4653 pos = 0;
4654 maxsize = 0;
4655 start_extent = 0;
4656 i = 0;
4657 do {
4658 unsigned long long esize;
4659
4660 esize = e[i].start - pos;
4661 if (esize >= maxsize) {
4662 maxsize = esize;
4663 start = pos;
4664 start_extent = i;
4665 }
4666 pos = e[i].start + e[i].size;
4667 i++;
4668 } while (e[i-1].size);
4669 free(e);
4670
4671 if (maxsize == 0)
4672 return 0;
4673
4674 /* FIXME assumes volume at offset 0 is the first volume in a
4675 * container
4676 */
4677 if (start_extent > 0)
4678 reserve = IMSM_RESERVED_SECTORS; /* gap between raid regions */
4679 else
4680 reserve = 0;
4681
4682 if (maxsize < reserve)
4683 return 0;
4684
4685 super->create_offset = ~((__u32) 0);
4686 if (start + reserve > super->create_offset)
4687 return 0; /* start overflows create_offset */
4688 super->create_offset = start + reserve;
4689
4690 return maxsize - reserve;
4691 }
4692
4693 static int is_raid_level_supported(const struct imsm_orom *orom, int level, int raiddisks)
4694 {
4695 if (level < 0 || level == 6 || level == 4)
4696 return 0;
4697
4698 /* if we have an orom prevent invalid raid levels */
4699 if (orom)
4700 switch (level) {
4701 case 0: return imsm_orom_has_raid0(orom);
4702 case 1:
4703 if (raiddisks > 2)
4704 return imsm_orom_has_raid1e(orom);
4705 return imsm_orom_has_raid1(orom) && raiddisks == 2;
4706 case 10: return imsm_orom_has_raid10(orom) && raiddisks == 4;
4707 case 5: return imsm_orom_has_raid5(orom) && raiddisks > 2;
4708 }
4709 else
4710 return 1; /* not on an Intel RAID platform so anything goes */
4711
4712 return 0;
4713 }
4714
4715
4716 #define pr_vrb(fmt, arg...) (void) (verbose && fprintf(stderr, Name fmt, ##arg))
4717 /*
4718 * validate volume parameters with OROM/EFI capabilities
4719 */
4720 static int
4721 validate_geometry_imsm_orom(struct intel_super *super, int level, int layout,
4722 int raiddisks, int *chunk, int verbose)
4723 {
4724 #if DEBUG
4725 verbose = 1;
4726 #endif
4727 /* validate container capabilities */
4728 if (super->orom && raiddisks > super->orom->tds) {
4729 if (verbose)
4730 fprintf(stderr, Name ": %d exceeds maximum number of"
4731 " platform supported disks: %d\n",
4732 raiddisks, super->orom->tds);
4733 return 0;
4734 }
4735
4736 /* capabilities of OROM tested - copied from validate_geometry_imsm_volume */
4737 if (super->orom && (!is_raid_level_supported(super->orom, level,
4738 raiddisks))) {
4739 pr_vrb(": platform does not support raid%d with %d disk%s\n",
4740 level, raiddisks, raiddisks > 1 ? "s" : "");
4741 return 0;
4742 }
4743 if (super->orom && level != 1) {
4744 if (chunk && (*chunk == 0 || *chunk == UnSet))
4745 *chunk = imsm_orom_default_chunk(super->orom);
4746 else if (chunk && !imsm_orom_has_chunk(super->orom, *chunk)) {
4747 pr_vrb(": platform does not support a chunk size of: "
4748 "%d\n", *chunk);
4749 return 0;
4750 }
4751 }
4752 if (layout != imsm_level_to_layout(level)) {
4753 if (level == 5)
4754 pr_vrb(": imsm raid 5 only supports the left-asymmetric layout\n");
4755 else if (level == 10)
4756 pr_vrb(": imsm raid 10 only supports the n2 layout\n");
4757 else
4758 pr_vrb(": imsm unknown layout %#x for this raid level %d\n",
4759 layout, level);
4760 return 0;
4761 }
4762 return 1;
4763 }
4764
4765 /* validate_geometry_imsm_volume - lifted from validate_geometry_ddf_bvd
4766 * FIX ME add ahci details
4767 */
4768 static int validate_geometry_imsm_volume(struct supertype *st, int level,
4769 int layout, int raiddisks, int *chunk,
4770 unsigned long long size, char *dev,
4771 unsigned long long *freesize,
4772 int verbose)
4773 {
4774 struct stat stb;
4775 struct intel_super *super = st->sb;
4776 struct imsm_super *mpb = super->anchor;
4777 struct dl *dl;
4778 unsigned long long pos = 0;
4779 unsigned long long maxsize;
4780 struct extent *e;
4781 int i;
4782
4783 /* We must have the container info already read in. */
4784 if (!super)
4785 return 0;
4786
4787 if (!validate_geometry_imsm_orom(super, level, layout, raiddisks, chunk, verbose)) {
4788 fprintf(stderr, Name ": RAID gemetry validation failed. "
4789 "Cannot proceed with the action(s).\n");
4790 return 0;
4791 }
4792 if (!dev) {
4793 /* General test: make sure there is space for
4794 * 'raiddisks' device extents of size 'size' at a given
4795 * offset
4796 */
4797 unsigned long long minsize = size;
4798 unsigned long long start_offset = MaxSector;
4799 int dcnt = 0;
4800 if (minsize == 0)
4801 minsize = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
4802 for (dl = super->disks; dl ; dl = dl->next) {
4803 int found = 0;
4804
4805 pos = 0;
4806 i = 0;
4807 e = get_extents(super, dl);
4808 if (!e) continue;
4809 do {
4810 unsigned long long esize;
4811 esize = e[i].start - pos;
4812 if (esize >= minsize)
4813 found = 1;
4814 if (found && start_offset == MaxSector) {
4815 start_offset = pos;
4816 break;
4817 } else if (found && pos != start_offset) {
4818 found = 0;
4819 break;
4820 }
4821 pos = e[i].start + e[i].size;
4822 i++;
4823 } while (e[i-1].size);
4824 if (found)
4825 dcnt++;
4826 free(e);
4827 }
4828 if (dcnt < raiddisks) {
4829 if (verbose)
4830 fprintf(stderr, Name ": imsm: Not enough "
4831 "devices with space for this array "
4832 "(%d < %d)\n",
4833 dcnt, raiddisks);
4834 return 0;
4835 }
4836 return 1;
4837 }
4838
4839 /* This device must be a member of the set */
4840 if (stat(dev, &stb) < 0)
4841 return 0;
4842 if ((S_IFMT & stb.st_mode) != S_IFBLK)
4843 return 0;
4844 for (dl = super->disks ; dl ; dl = dl->next) {
4845 if (dl->major == (int)major(stb.st_rdev) &&
4846 dl->minor == (int)minor(stb.st_rdev))
4847 break;
4848 }
4849 if (!dl) {
4850 if (verbose)
4851 fprintf(stderr, Name ": %s is not in the "
4852 "same imsm set\n", dev);
4853 return 0;
4854 } else if (super->orom && dl->index < 0 && mpb->num_raid_devs) {
4855 /* If a volume is present then the current creation attempt
4856 * cannot incorporate new spares because the orom may not
4857 * understand this configuration (all member disks must be
4858 * members of each array in the container).
4859 */
4860 fprintf(stderr, Name ": %s is a spare and a volume"
4861 " is already defined for this container\n", dev);
4862 fprintf(stderr, Name ": The option-rom requires all member"
4863 " disks to be a member of all volumes\n");
4864 return 0;
4865 }
4866
4867 /* retrieve the largest free space block */
4868 e = get_extents(super, dl);
4869 maxsize = 0;
4870 i = 0;
4871 if (e) {
4872 do {
4873 unsigned long long esize;
4874
4875 esize = e[i].start - pos;
4876 if (esize >= maxsize)
4877 maxsize = esize;
4878 pos = e[i].start + e[i].size;
4879 i++;
4880 } while (e[i-1].size);
4881 dl->e = e;
4882 dl->extent_cnt = i;
4883 } else {
4884 if (verbose)
4885 fprintf(stderr, Name ": unable to determine free space for: %s\n",
4886 dev);
4887 return 0;
4888 }
4889 if (maxsize < size) {
4890 if (verbose)
4891 fprintf(stderr, Name ": %s not enough space (%llu < %llu)\n",
4892 dev, maxsize, size);
4893 return 0;
4894 }
4895
4896 /* count total number of extents for merge */
4897 i = 0;
4898 for (dl = super->disks; dl; dl = dl->next)
4899 if (dl->e)
4900 i += dl->extent_cnt;
4901
4902 maxsize = merge_extents(super, i);
4903 if (maxsize < size || maxsize == 0) {
4904 if (verbose)
4905 fprintf(stderr, Name ": not enough space after merge (%llu < %llu)\n",
4906 maxsize, size);
4907 return 0;
4908 }
4909
4910 *freesize = maxsize;
4911
4912 return 1;
4913 }
4914
4915 static int reserve_space(struct supertype *st, int raiddisks,
4916 unsigned long long size, int chunk,
4917 unsigned long long *freesize)
4918 {
4919 struct intel_super *super = st->sb;
4920 struct imsm_super *mpb = super->anchor;
4921 struct dl *dl;
4922 int i;
4923 int extent_cnt;
4924 struct extent *e;
4925 unsigned long long maxsize;
4926 unsigned long long minsize;
4927 int cnt;
4928 int used;
4929
4930 /* find the largest common start free region of the possible disks */
4931 used = 0;
4932 extent_cnt = 0;
4933 cnt = 0;
4934 for (dl = super->disks; dl; dl = dl->next) {
4935 dl->raiddisk = -1;
4936
4937 if (dl->index >= 0)
4938 used++;
4939
4940 /* don't activate new spares if we are orom constrained
4941 * and there is already a volume active in the container
4942 */
4943 if (super->orom && dl->index < 0 && mpb->num_raid_devs)
4944 continue;
4945
4946 e = get_extents(super, dl);
4947 if (!e)
4948 continue;
4949 for (i = 1; e[i-1].size; i++)
4950 ;
4951 dl->e = e;
4952 dl->extent_cnt = i;
4953 extent_cnt += i;
4954 cnt++;
4955 }
4956
4957 maxsize = merge_extents(super, extent_cnt);
4958 minsize = size;
4959 if (size == 0)
4960 /* chunk is in K */
4961 minsize = chunk * 2;
4962
4963 if (cnt < raiddisks ||
4964 (super->orom && used && used != raiddisks) ||
4965 maxsize < minsize ||
4966 maxsize == 0) {
4967 fprintf(stderr, Name ": not enough devices with space to create array.\n");
4968 return 0; /* No enough free spaces large enough */
4969 }
4970
4971 if (size == 0) {
4972 size = maxsize;
4973 if (chunk) {
4974 size /= 2 * chunk;
4975 size *= 2 * chunk;
4976 }
4977 }
4978
4979 cnt = 0;
4980 for (dl = super->disks; dl; dl = dl->next)
4981 if (dl->e)
4982 dl->raiddisk = cnt++;
4983
4984 *freesize = size;
4985
4986 return 1;
4987 }
4988
4989 static int validate_geometry_imsm(struct supertype *st, int level, int layout,
4990 int raiddisks, int *chunk, unsigned long long size,
4991 char *dev, unsigned long long *freesize,
4992 int verbose)
4993 {
4994 int fd, cfd;
4995 struct mdinfo *sra;
4996 int is_member = 0;
4997
4998 /* load capability
4999 * if given unused devices create a container
5000 * if given given devices in a container create a member volume
5001 */
5002 if (level == LEVEL_CONTAINER) {
5003 /* Must be a fresh device to add to a container */
5004 return validate_geometry_imsm_container(st, level, layout,
5005 raiddisks,
5006 chunk?*chunk:0, size,
5007 dev, freesize,
5008 verbose);
5009 }
5010
5011 if (!dev) {
5012 if (st->sb && freesize) {
5013 /* we are being asked to automatically layout a
5014 * new volume based on the current contents of
5015 * the container. If the the parameters can be
5016 * satisfied reserve_space will record the disks,
5017 * start offset, and size of the volume to be
5018 * created. add_to_super and getinfo_super
5019 * detect when autolayout is in progress.
5020 */
5021 if (!validate_geometry_imsm_orom(st->sb, level, layout,
5022 raiddisks, chunk,
5023 verbose))
5024 return 0;
5025 return reserve_space(st, raiddisks, size,
5026 chunk?*chunk:0, freesize);
5027 }
5028 return 1;
5029 }
5030 if (st->sb) {
5031 /* creating in a given container */
5032 return validate_geometry_imsm_volume(st, level, layout,
5033 raiddisks, chunk, size,
5034 dev, freesize, verbose);
5035 }
5036
5037 /* This device needs to be a device in an 'imsm' container */
5038 fd = open(dev, O_RDONLY|O_EXCL, 0);
5039 if (fd >= 0) {
5040 if (verbose)
5041 fprintf(stderr,
5042 Name ": Cannot create this array on device %s\n",
5043 dev);
5044 close(fd);
5045 return 0;
5046 }
5047 if (errno != EBUSY || (fd = open(dev, O_RDONLY, 0)) < 0) {
5048 if (verbose)
5049 fprintf(stderr, Name ": Cannot open %s: %s\n",
5050 dev, strerror(errno));
5051 return 0;
5052 }
5053 /* Well, it is in use by someone, maybe an 'imsm' container. */
5054 cfd = open_container(fd);
5055 close(fd);
5056 if (cfd < 0) {
5057 if (verbose)
5058 fprintf(stderr, Name ": Cannot use %s: It is busy\n",
5059 dev);
5060 return 0;
5061 }
5062 sra = sysfs_read(cfd, 0, GET_VERSION);
5063 if (sra && sra->array.major_version == -1 &&
5064 strcmp(sra->text_version, "imsm") == 0)
5065 is_member = 1;
5066 sysfs_free(sra);
5067 if (is_member) {
5068 /* This is a member of a imsm container. Load the container
5069 * and try to create a volume
5070 */
5071 struct intel_super *super;
5072
5073 if (load_super_imsm_all(st, cfd, (void **) &super, NULL) == 0) {
5074 st->sb = super;
5075 st->container_dev = fd2devnum(cfd);
5076 close(cfd);
5077 return validate_geometry_imsm_volume(st, level, layout,
5078 raiddisks, chunk,
5079 size, dev,
5080 freesize, verbose);
5081 }
5082 }
5083
5084 if (verbose)
5085 fprintf(stderr, Name ": failed container membership check\n");
5086
5087 close(cfd);
5088 return 0;
5089 }
5090
5091 static void default_geometry_imsm(struct supertype *st, int *level, int *layout, int *chunk)
5092 {
5093 struct intel_super *super = st->sb;
5094
5095 if (level && *level == UnSet)
5096 *level = LEVEL_CONTAINER;
5097
5098 if (level && layout && *layout == UnSet)
5099 *layout = imsm_level_to_layout(*level);
5100
5101 if (chunk && (*chunk == UnSet || *chunk == 0) &&
5102 super && super->orom)
5103 *chunk = imsm_orom_default_chunk(super->orom);
5104 }
5105
5106 static void handle_missing(struct intel_super *super, struct imsm_dev *dev);
5107
5108 static int kill_subarray_imsm(struct supertype *st)
5109 {
5110 /* remove the subarray currently referenced by ->current_vol */
5111 __u8 i;
5112 struct intel_dev **dp;
5113 struct intel_super *super = st->sb;
5114 __u8 current_vol = super->current_vol;
5115 struct imsm_super *mpb = super->anchor;
5116
5117 if (super->current_vol < 0)
5118 return 2;
5119 super->current_vol = -1; /* invalidate subarray cursor */
5120
5121 /* block deletions that would change the uuid of active subarrays
5122 *
5123 * FIXME when immutable ids are available, but note that we'll
5124 * also need to fixup the invalidated/active subarray indexes in
5125 * mdstat
5126 */
5127 for (i = 0; i < mpb->num_raid_devs; i++) {
5128 char subarray[4];
5129
5130 if (i < current_vol)
5131 continue;
5132 sprintf(subarray, "%u", i);
5133 if (is_subarray_active(subarray, st->devname)) {
5134 fprintf(stderr,
5135 Name ": deleting subarray-%d would change the UUID of active subarray-%d, aborting\n",
5136 current_vol, i);
5137
5138 return 2;
5139 }
5140 }
5141
5142 if (st->update_tail) {
5143 struct imsm_update_kill_array *u = malloc(sizeof(*u));
5144
5145 if (!u)
5146 return 2;
5147 u->type = update_kill_array;
5148 u->dev_idx = current_vol;
5149 append_metadata_update(st, u, sizeof(*u));
5150
5151 return 0;
5152 }
5153
5154 for (dp = &super->devlist; *dp;)
5155 if ((*dp)->index == current_vol) {
5156 *dp = (*dp)->next;
5157 } else {
5158 handle_missing(super, (*dp)->dev);
5159 if ((*dp)->index > current_vol)
5160 (*dp)->index--;
5161 dp = &(*dp)->next;
5162 }
5163
5164 /* no more raid devices, all active components are now spares,
5165 * but of course failed are still failed
5166 */
5167 if (--mpb->num_raid_devs == 0) {
5168 struct dl *d;
5169
5170 for (d = super->disks; d; d = d->next)
5171 if (d->index > -2) {
5172 d->index = -1;
5173 d->disk.status = SPARE_DISK;
5174 }
5175 }
5176
5177 super->updates_pending++;
5178
5179 return 0;
5180 }
5181
5182 static int update_subarray_imsm(struct supertype *st, char *subarray,
5183 char *update, struct mddev_ident *ident)
5184 {
5185 /* update the subarray currently referenced by ->current_vol */
5186 struct intel_super *super = st->sb;
5187 struct imsm_super *mpb = super->anchor;
5188
5189 if (strcmp(update, "name") == 0) {
5190 char *name = ident->name;
5191 char *ep;
5192 int vol;
5193
5194 if (is_subarray_active(subarray, st->devname)) {
5195 fprintf(stderr,
5196 Name ": Unable to update name of active subarray\n");
5197 return 2;
5198 }
5199
5200 if (!check_name(super, name, 0))
5201 return 2;
5202
5203 vol = strtoul(subarray, &ep, 10);
5204 if (*ep != '\0' || vol >= super->anchor->num_raid_devs)
5205 return 2;
5206
5207 if (st->update_tail) {
5208 struct imsm_update_rename_array *u = malloc(sizeof(*u));
5209
5210 if (!u)
5211 return 2;
5212 u->type = update_rename_array;
5213 u->dev_idx = vol;
5214 snprintf((char *) u->name, MAX_RAID_SERIAL_LEN, "%s", name);
5215 append_metadata_update(st, u, sizeof(*u));
5216 } else {
5217 struct imsm_dev *dev;
5218 int i;
5219
5220 dev = get_imsm_dev(super, vol);
5221 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
5222 for (i = 0; i < mpb->num_raid_devs; i++) {
5223 dev = get_imsm_dev(super, i);
5224 handle_missing(super, dev);
5225 }
5226 super->updates_pending++;
5227 }
5228 } else
5229 return 2;
5230
5231 return 0;
5232 }
5233
5234 static int is_gen_migration(struct imsm_dev *dev)
5235 {
5236 if (!dev->vol.migr_state)
5237 return 0;
5238
5239 if (migr_type(dev) == MIGR_GEN_MIGR)
5240 return 1;
5241
5242 return 0;
5243 }
5244 #endif /* MDASSEMBLE */
5245
5246 static int is_rebuilding(struct imsm_dev *dev)
5247 {
5248 struct imsm_map *migr_map;
5249
5250 if (!dev->vol.migr_state)
5251 return 0;
5252
5253 if (migr_type(dev) != MIGR_REBUILD)
5254 return 0;
5255
5256 migr_map = get_imsm_map(dev, 1);
5257
5258 if (migr_map->map_state == IMSM_T_STATE_DEGRADED)
5259 return 1;
5260 else
5261 return 0;
5262 }
5263
5264 static void update_recovery_start(struct intel_super *super,
5265 struct imsm_dev *dev,
5266 struct mdinfo *array)
5267 {
5268 struct mdinfo *rebuild = NULL;
5269 struct mdinfo *d;
5270 __u32 units;
5271
5272 if (!is_rebuilding(dev))
5273 return;
5274
5275 /* Find the rebuild target, but punt on the dual rebuild case */
5276 for (d = array->devs; d; d = d->next)
5277 if (d->recovery_start == 0) {
5278 if (rebuild)
5279 return;
5280 rebuild = d;
5281 }
5282
5283 if (!rebuild) {
5284 /* (?) none of the disks are marked with
5285 * IMSM_ORD_REBUILD, so assume they are missing and the
5286 * disk_ord_tbl was not correctly updated
5287 */
5288 dprintf("%s: failed to locate out-of-sync disk\n", __func__);
5289 return;
5290 }
5291
5292 units = __le32_to_cpu(dev->vol.curr_migr_unit);
5293 rebuild->recovery_start = units * blocks_per_migr_unit(super, dev);
5294 }
5295
5296 static int recover_backup_imsm(struct supertype *st, struct mdinfo *info);
5297
5298 static struct mdinfo *container_content_imsm(struct supertype *st, char *subarray)
5299 {
5300 /* Given a container loaded by load_super_imsm_all,
5301 * extract information about all the arrays into
5302 * an mdinfo tree.
5303 * If 'subarray' is given, just extract info about that array.
5304 *
5305 * For each imsm_dev create an mdinfo, fill it in,
5306 * then look for matching devices in super->disks
5307 * and create appropriate device mdinfo.
5308 */
5309 struct intel_super *super = st->sb;
5310 struct imsm_super *mpb = super->anchor;
5311 struct mdinfo *rest = NULL;
5312 unsigned int i;
5313 int bbm_errors = 0;
5314 struct dl *d;
5315 int spare_disks = 0;
5316
5317 /* check for bad blocks */
5318 if (imsm_bbm_log_size(super->anchor))
5319 bbm_errors = 1;
5320
5321 /* count spare devices, not used in maps
5322 */
5323 for (d = super->disks; d; d = d->next)
5324 if (d->index == -1)
5325 spare_disks++;
5326
5327 for (i = 0; i < mpb->num_raid_devs; i++) {
5328 struct imsm_dev *dev;
5329 struct imsm_map *map;
5330 struct imsm_map *map2;
5331 struct mdinfo *this;
5332 int slot, chunk;
5333 char *ep;
5334
5335 if (subarray &&
5336 (i != strtoul(subarray, &ep, 10) || *ep != '\0'))
5337 continue;
5338
5339 dev = get_imsm_dev(super, i);
5340 map = get_imsm_map(dev, 0);
5341 map2 = get_imsm_map(dev, 1);
5342
5343 /* do not publish arrays that are in the middle of an
5344 * unsupported migration
5345 */
5346 if (dev->vol.migr_state &&
5347 (migr_type(dev) == MIGR_STATE_CHANGE)) {
5348 fprintf(stderr, Name ": cannot assemble volume '%.16s':"
5349 " unsupported migration in progress\n",
5350 dev->volume);
5351 continue;
5352 }
5353 /* do not publish arrays that are not support by controller's
5354 * OROM/EFI
5355 */
5356
5357 chunk = __le16_to_cpu(map->blocks_per_strip) >> 1;
5358 #ifndef MDASSEMBLE
5359 if (!validate_geometry_imsm_orom(super,
5360 get_imsm_raid_level(map), /* RAID level */
5361 imsm_level_to_layout(get_imsm_raid_level(map)),
5362 map->num_members, /* raid disks */
5363 &chunk,
5364 1 /* verbose */)) {
5365 fprintf(stderr, Name ": RAID gemetry validation failed. "
5366 "Cannot proceed with the action(s).\n");
5367 continue;
5368 }
5369 #endif /* MDASSEMBLE */
5370 this = malloc(sizeof(*this));
5371 if (!this) {
5372 fprintf(stderr, Name ": failed to allocate %zu bytes\n",
5373 sizeof(*this));
5374 break;
5375 }
5376 memset(this, 0, sizeof(*this));
5377 this->next = rest;
5378
5379 super->current_vol = i;
5380 getinfo_super_imsm_volume(st, this, NULL);
5381 for (slot = 0 ; slot < map->num_members; slot++) {
5382 unsigned long long recovery_start;
5383 struct mdinfo *info_d;
5384 struct dl *d;
5385 int idx;
5386 int skip;
5387 __u32 ord;
5388
5389 skip = 0;
5390 idx = get_imsm_disk_idx(dev, slot, 0);
5391 ord = get_imsm_ord_tbl_ent(dev, slot, -1);
5392 for (d = super->disks; d ; d = d->next)
5393 if (d->index == idx)
5394 break;
5395
5396 recovery_start = MaxSector;
5397 if (d == NULL)
5398 skip = 1;
5399 if (d && is_failed(&d->disk))
5400 skip = 1;
5401 if (ord & IMSM_ORD_REBUILD)
5402 recovery_start = 0;
5403
5404 /*
5405 * if we skip some disks the array will be assmebled degraded;
5406 * reset resync start to avoid a dirty-degraded
5407 * situation when performing the intial sync
5408 *
5409 * FIXME handle dirty degraded
5410 */
5411 if ((skip || recovery_start == 0) && !dev->vol.dirty)
5412 this->resync_start = MaxSector;
5413 if (skip)
5414 continue;
5415
5416 info_d = calloc(1, sizeof(*info_d));
5417 if (!info_d) {
5418 fprintf(stderr, Name ": failed to allocate disk"
5419 " for volume %.16s\n", dev->volume);
5420 info_d = this->devs;
5421 while (info_d) {
5422 struct mdinfo *d = info_d->next;
5423
5424 free(info_d);
5425 info_d = d;
5426 }
5427 free(this);
5428 this = rest;
5429 break;
5430 }
5431 info_d->next = this->devs;
5432 this->devs = info_d;
5433
5434 info_d->disk.number = d->index;
5435 info_d->disk.major = d->major;
5436 info_d->disk.minor = d->minor;
5437 info_d->disk.raid_disk = slot;
5438 info_d->recovery_start = recovery_start;
5439 if (map2) {
5440 if (slot < map2->num_members)
5441 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5442 else
5443 this->array.spare_disks++;
5444 } else {
5445 if (slot < map->num_members)
5446 info_d->disk.state = (1 << MD_DISK_ACTIVE);
5447 else
5448 this->array.spare_disks++;
5449 }
5450 if (info_d->recovery_start == MaxSector)
5451 this->array.working_disks++;
5452
5453 info_d->events = __le32_to_cpu(mpb->generation_num);
5454 info_d->data_offset = __le32_to_cpu(map->pba_of_lba0);
5455 info_d->component_size = __le32_to_cpu(map->blocks_per_member);
5456 }
5457 /* now that the disk list is up-to-date fixup recovery_start */
5458 update_recovery_start(super, dev, this);
5459 this->array.spare_disks += spare_disks;
5460
5461 /* check for reshape */
5462 if (this->reshape_active == 1)
5463 recover_backup_imsm(st, this);
5464
5465 rest = this;
5466 }
5467
5468 /* if array has bad blocks, set suitable bit in array status */
5469 if (bbm_errors)
5470 rest->array.state |= (1<<MD_SB_BBM_ERRORS);
5471
5472 return rest;
5473 }
5474
5475
5476 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev, int failed)
5477 {
5478 struct imsm_map *map = get_imsm_map(dev, 0);
5479
5480 if (!failed)
5481 return map->map_state == IMSM_T_STATE_UNINITIALIZED ?
5482 IMSM_T_STATE_UNINITIALIZED : IMSM_T_STATE_NORMAL;
5483
5484 switch (get_imsm_raid_level(map)) {
5485 case 0:
5486 return IMSM_T_STATE_FAILED;
5487 break;
5488 case 1:
5489 if (failed < map->num_members)
5490 return IMSM_T_STATE_DEGRADED;
5491 else
5492 return IMSM_T_STATE_FAILED;
5493 break;
5494 case 10:
5495 {
5496 /**
5497 * check to see if any mirrors have failed, otherwise we
5498 * are degraded. Even numbered slots are mirrored on
5499 * slot+1
5500 */
5501 int i;
5502 /* gcc -Os complains that this is unused */
5503 int insync = insync;
5504
5505 for (i = 0; i < map->num_members; i++) {
5506 __u32 ord = get_imsm_ord_tbl_ent(dev, i, -1);
5507 int idx = ord_to_idx(ord);
5508 struct imsm_disk *disk;
5509
5510 /* reset the potential in-sync count on even-numbered
5511 * slots. num_copies is always 2 for imsm raid10
5512 */
5513 if ((i & 1) == 0)
5514 insync = 2;
5515
5516 disk = get_imsm_disk(super, idx);
5517 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5518 insync--;
5519
5520 /* no in-sync disks left in this mirror the
5521 * array has failed
5522 */
5523 if (insync == 0)
5524 return IMSM_T_STATE_FAILED;
5525 }
5526
5527 return IMSM_T_STATE_DEGRADED;
5528 }
5529 case 5:
5530 if (failed < 2)
5531 return IMSM_T_STATE_DEGRADED;
5532 else
5533 return IMSM_T_STATE_FAILED;
5534 break;
5535 default:
5536 break;
5537 }
5538
5539 return map->map_state;
5540 }
5541
5542 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev)
5543 {
5544 int i;
5545 int failed = 0;
5546 struct imsm_disk *disk;
5547 struct imsm_map *map = get_imsm_map(dev, 0);
5548 struct imsm_map *prev = get_imsm_map(dev, dev->vol.migr_state);
5549 __u32 ord;
5550 int idx;
5551
5552 /* at the beginning of migration we set IMSM_ORD_REBUILD on
5553 * disks that are being rebuilt. New failures are recorded to
5554 * map[0]. So we look through all the disks we started with and
5555 * see if any failures are still present, or if any new ones
5556 * have arrived
5557 *
5558 * FIXME add support for online capacity expansion and
5559 * raid-level-migration
5560 */
5561 for (i = 0; i < prev->num_members; i++) {
5562 ord = __le32_to_cpu(prev->disk_ord_tbl[i]);
5563 ord |= __le32_to_cpu(map->disk_ord_tbl[i]);
5564 idx = ord_to_idx(ord);
5565
5566 disk = get_imsm_disk(super, idx);
5567 if (!disk || is_failed(disk) || ord & IMSM_ORD_REBUILD)
5568 failed++;
5569 }
5570
5571 return failed;
5572 }
5573
5574 #ifndef MDASSEMBLE
5575 static int imsm_open_new(struct supertype *c, struct active_array *a,
5576 char *inst)
5577 {
5578 struct intel_super *super = c->sb;
5579 struct imsm_super *mpb = super->anchor;
5580
5581 if (atoi(inst) >= mpb->num_raid_devs) {
5582 fprintf(stderr, "%s: subarry index %d, out of range\n",
5583 __func__, atoi(inst));
5584 return -ENODEV;
5585 }
5586
5587 dprintf("imsm: open_new %s\n", inst);
5588 a->info.container_member = atoi(inst);
5589 return 0;
5590 }
5591
5592 static int is_resyncing(struct imsm_dev *dev)
5593 {
5594 struct imsm_map *migr_map;
5595
5596 if (!dev->vol.migr_state)
5597 return 0;
5598
5599 if (migr_type(dev) == MIGR_INIT ||
5600 migr_type(dev) == MIGR_REPAIR)
5601 return 1;
5602
5603 if (migr_type(dev) == MIGR_GEN_MIGR)
5604 return 0;
5605
5606 migr_map = get_imsm_map(dev, 1);
5607
5608 if ((migr_map->map_state == IMSM_T_STATE_NORMAL) &&
5609 (dev->vol.migr_type != MIGR_GEN_MIGR))
5610 return 1;
5611 else
5612 return 0;
5613 }
5614
5615 /* return true if we recorded new information */
5616 static int mark_failure(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5617 {
5618 __u32 ord;
5619 int slot;
5620 struct imsm_map *map;
5621
5622 /* new failures are always set in map[0] */
5623 map = get_imsm_map(dev, 0);
5624
5625 slot = get_imsm_disk_slot(map, idx);
5626 if (slot < 0)
5627 return 0;
5628
5629 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
5630 if (is_failed(disk) && (ord & IMSM_ORD_REBUILD))
5631 return 0;
5632
5633 disk->status |= FAILED_DISK;
5634 set_imsm_ord_tbl_ent(map, slot, idx | IMSM_ORD_REBUILD);
5635 if (map->failed_disk_num == 0xff)
5636 map->failed_disk_num = slot;
5637 return 1;
5638 }
5639
5640 static void mark_missing(struct imsm_dev *dev, struct imsm_disk *disk, int idx)
5641 {
5642 mark_failure(dev, disk, idx);
5643
5644 if (disk->scsi_id == __cpu_to_le32(~(__u32)0))
5645 return;
5646
5647 disk->scsi_id = __cpu_to_le32(~(__u32)0);
5648 memmove(&disk->serial[0], &disk->serial[1], MAX_RAID_SERIAL_LEN - 1);
5649 }
5650
5651 static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
5652 {
5653 __u8 map_state;
5654 struct dl *dl;
5655 int failed;
5656
5657 if (!super->missing)
5658 return;
5659 failed = imsm_count_failed(super, dev);
5660 map_state = imsm_check_degraded(super, dev, failed);
5661
5662 dprintf("imsm: mark missing\n");
5663 end_migration(dev, map_state);
5664 for (dl = super->missing; dl; dl = dl->next)
5665 mark_missing(dev, &dl->disk, dl->index);
5666 super->updates_pending++;
5667 }
5668
5669 static unsigned long long imsm_set_array_size(struct imsm_dev *dev)
5670 {
5671 int used_disks = imsm_num_data_members(dev, 0);
5672 unsigned long long array_blocks;
5673 struct imsm_map *map;
5674
5675 if (used_disks == 0) {
5676 /* when problems occures
5677 * return current array_blocks value
5678 */
5679 array_blocks = __le32_to_cpu(dev->size_high);
5680 array_blocks = array_blocks << 32;
5681 array_blocks += __le32_to_cpu(dev->size_low);
5682
5683 return array_blocks;
5684 }
5685
5686 /* set array size in metadata
5687 */
5688 map = get_imsm_map(dev, 0);
5689 array_blocks = map->blocks_per_member * used_disks;
5690
5691 /* round array size down to closest MB
5692 */
5693 array_blocks = (array_blocks >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
5694 dev->size_low = __cpu_to_le32((__u32)array_blocks);
5695 dev->size_high = __cpu_to_le32((__u32)(array_blocks >> 32));
5696
5697 return array_blocks;
5698 }
5699
5700 static void imsm_set_disk(struct active_array *a, int n, int state);
5701
5702 static void imsm_progress_container_reshape(struct intel_super *super)
5703 {
5704 /* if no device has a migr_state, but some device has a
5705 * different number of members than the previous device, start
5706 * changing the number of devices in this device to match
5707 * previous.
5708 */
5709 struct imsm_super *mpb = super->anchor;
5710 int prev_disks = -1;
5711 int i;
5712 int copy_map_size;
5713
5714 for (i = 0; i < mpb->num_raid_devs; i++) {
5715 struct imsm_dev *dev = get_imsm_dev(super, i);
5716 struct imsm_map *map = get_imsm_map(dev, 0);
5717 struct imsm_map *map2;
5718 int prev_num_members;
5719
5720 if (dev->vol.migr_state)
5721 return;
5722
5723 if (prev_disks == -1)
5724 prev_disks = map->num_members;
5725 if (prev_disks == map->num_members)
5726 continue;
5727
5728 /* OK, this array needs to enter reshape mode.
5729 * i.e it needs a migr_state
5730 */
5731
5732 copy_map_size = sizeof_imsm_map(map);
5733 prev_num_members = map->num_members;
5734 map->num_members = prev_disks;
5735 dev->vol.migr_state = 1;
5736 dev->vol.curr_migr_unit = 0;
5737 dev->vol.migr_type = MIGR_GEN_MIGR;
5738 for (i = prev_num_members;
5739 i < map->num_members; i++)
5740 set_imsm_ord_tbl_ent(map, i, i);
5741 map2 = get_imsm_map(dev, 1);
5742 /* Copy the current map */
5743 memcpy(map2, map, copy_map_size);
5744 map2->num_members = prev_num_members;
5745
5746 imsm_set_array_size(dev);
5747 super->updates_pending++;
5748 }
5749 }
5750
5751 /* Handle dirty -> clean transititions, resync and reshape. Degraded and rebuild
5752 * states are handled in imsm_set_disk() with one exception, when a
5753 * resync is stopped due to a new failure this routine will set the
5754 * 'degraded' state for the array.
5755 */
5756 static int imsm_set_array_state(struct active_array *a, int consistent)
5757 {
5758 int inst = a->info.container_member;
5759 struct intel_super *super = a->container->sb;
5760 struct imsm_dev *dev = get_imsm_dev(super, inst);
5761 struct imsm_map *map = get_imsm_map(dev, 0);
5762 int failed = imsm_count_failed(super, dev);
5763 __u8 map_state = imsm_check_degraded(super, dev, failed);
5764 __u32 blocks_per_unit;
5765
5766 if (dev->vol.migr_state &&
5767 dev->vol.migr_type == MIGR_GEN_MIGR) {
5768 /* array state change is blocked due to reshape action
5769 * We might need to
5770 * - abort the reshape (if last_checkpoint is 0 and action!= reshape)
5771 * - finish the reshape (if last_checkpoint is big and action != reshape)
5772 * - update curr_migr_unit
5773 */
5774 if (a->curr_action == reshape) {
5775 /* still reshaping, maybe update curr_migr_unit */
5776 goto mark_checkpoint;
5777 } else {
5778 if (a->last_checkpoint == 0 && a->prev_action == reshape) {
5779 /* for some reason we aborted the reshape.
5780 * Better clean up
5781 */
5782 struct imsm_map *map2 = get_imsm_map(dev, 1);
5783 dev->vol.migr_state = 0;
5784 dev->vol.migr_type = 0;
5785 dev->vol.curr_migr_unit = 0;
5786 memcpy(map, map2, sizeof_imsm_map(map2));
5787 super->updates_pending++;
5788 }
5789 if (a->last_checkpoint >= a->info.component_size) {
5790 unsigned long long array_blocks;
5791 int used_disks;
5792 struct mdinfo *mdi;
5793
5794 used_disks = imsm_num_data_members(dev, 0);
5795 if (used_disks > 0) {
5796 array_blocks =
5797 map->blocks_per_member *
5798 used_disks;
5799 /* round array size down to closest MB
5800 */
5801 array_blocks = (array_blocks
5802 >> SECT_PER_MB_SHIFT)
5803 << SECT_PER_MB_SHIFT;
5804 a->info.custom_array_size = array_blocks;
5805 /* encourage manager to update array
5806 * size
5807 */
5808
5809 a->check_reshape = 1;
5810 }
5811 /* finalize online capacity expansion/reshape */
5812 for (mdi = a->info.devs; mdi; mdi = mdi->next)
5813 imsm_set_disk(a,
5814 mdi->disk.raid_disk,
5815 mdi->curr_state);
5816
5817 imsm_progress_container_reshape(super);
5818 }
5819 }
5820 }
5821
5822 /* before we activate this array handle any missing disks */
5823 if (consistent == 2)
5824 handle_missing(super, dev);
5825
5826 if (consistent == 2 &&
5827 (!is_resync_complete(&a->info) ||
5828 map_state != IMSM_T_STATE_NORMAL ||
5829 dev->vol.migr_state))
5830 consistent = 0;
5831
5832 if (is_resync_complete(&a->info)) {
5833 /* complete intialization / resync,
5834 * recovery and interrupted recovery is completed in
5835 * ->set_disk
5836 */
5837 if (is_resyncing(dev)) {
5838 dprintf("imsm: mark resync done\n");
5839 end_migration(dev, map_state);
5840 super->updates_pending++;
5841 a->last_checkpoint = 0;
5842 }
5843 } else if (!is_resyncing(dev) && !failed) {
5844 /* mark the start of the init process if nothing is failed */
5845 dprintf("imsm: mark resync start\n");
5846 if (map->map_state == IMSM_T_STATE_UNINITIALIZED)
5847 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_INIT);
5848 else
5849 migrate(dev, super, IMSM_T_STATE_NORMAL, MIGR_REPAIR);
5850 super->updates_pending++;
5851 }
5852
5853 mark_checkpoint:
5854 /* check if we can update curr_migr_unit from resync_start, recovery_start */
5855 blocks_per_unit = blocks_per_migr_unit(super, dev);
5856 if (blocks_per_unit) {
5857 __u32 units32;
5858 __u64 units;
5859
5860 units = a->last_checkpoint / blocks_per_unit;
5861 units32 = units;
5862
5863 /* check that we did not overflow 32-bits, and that
5864 * curr_migr_unit needs updating
5865 */
5866 if (units32 == units &&
5867 units32 != 0 &&
5868 __le32_to_cpu(dev->vol.curr_migr_unit) != units32) {
5869 dprintf("imsm: mark checkpoint (%u)\n", units32);
5870 dev->vol.curr_migr_unit = __cpu_to_le32(units32);
5871 super->updates_pending++;
5872 }
5873 }
5874
5875 /* mark dirty / clean */
5876 if (dev->vol.dirty != !consistent) {
5877 dprintf("imsm: mark '%s'\n", consistent ? "clean" : "dirty");
5878 if (consistent)
5879 dev->vol.dirty = 0;
5880 else
5881 dev->vol.dirty = 1;
5882 super->updates_pending++;
5883 }
5884
5885 return consistent;
5886 }
5887
5888 static void imsm_set_disk(struct active_array *a, int n, int state)
5889 {
5890 int inst = a->info.container_member;
5891 struct intel_super *super = a->container->sb;
5892 struct imsm_dev *dev = get_imsm_dev(super, inst);
5893 struct imsm_map *map = get_imsm_map(dev, 0);
5894 struct imsm_disk *disk;
5895 int failed;
5896 __u32 ord;
5897 __u8 map_state;
5898
5899 if (n > map->num_members)
5900 fprintf(stderr, "imsm: set_disk %d out of range 0..%d\n",
5901 n, map->num_members - 1);
5902
5903 if (n < 0)
5904 return;
5905
5906 dprintf("imsm: set_disk %d:%x\n", n, state);
5907
5908 ord = get_imsm_ord_tbl_ent(dev, n, -1);
5909 disk = get_imsm_disk(super, ord_to_idx(ord));
5910
5911 /* check for new failures */
5912 if (state & DS_FAULTY) {
5913 if (mark_failure(dev, disk, ord_to_idx(ord)))
5914 super->updates_pending++;
5915 }
5916
5917 /* check if in_sync */
5918 if (state & DS_INSYNC && ord & IMSM_ORD_REBUILD && is_rebuilding(dev)) {
5919 struct imsm_map *migr_map = get_imsm_map(dev, 1);
5920
5921 set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
5922 super->updates_pending++;
5923 }
5924
5925 failed = imsm_count_failed(super, dev);
5926 map_state = imsm_check_degraded(super, dev, failed);
5927
5928 /* check if recovery complete, newly degraded, or failed */
5929 if (map_state == IMSM_T_STATE_NORMAL && is_rebuilding(dev)) {
5930 end_migration(dev, map_state);
5931 map = get_imsm_map(dev, 0);
5932 map->failed_disk_num = ~0;
5933 super->updates_pending++;
5934 a->last_checkpoint = 0;
5935 } else if (map_state == IMSM_T_STATE_DEGRADED &&
5936 map->map_state != map_state &&
5937 !dev->vol.migr_state) {
5938 dprintf("imsm: mark degraded\n");
5939 map->map_state = map_state;
5940 super->updates_pending++;
5941 a->last_checkpoint = 0;
5942 } else if (map_state == IMSM_T_STATE_FAILED &&
5943 map->map_state != map_state) {
5944 dprintf("imsm: mark failed\n");
5945 end_migration(dev, map_state);
5946 super->updates_pending++;
5947 a->last_checkpoint = 0;
5948 } else if (is_gen_migration(dev)) {
5949 dprintf("imsm: Detected General Migration in state: ");
5950 if (map_state == IMSM_T_STATE_NORMAL) {
5951 end_migration(dev, map_state);
5952 map = get_imsm_map(dev, 0);
5953 map->failed_disk_num = ~0;
5954 dprintf("normal\n");
5955 } else {
5956 if (map_state == IMSM_T_STATE_DEGRADED) {
5957 printf("degraded\n");
5958 end_migration(dev, map_state);
5959 } else {
5960 dprintf("failed\n");
5961 }
5962 map->map_state = map_state;
5963 }
5964 super->updates_pending++;
5965 }
5966 }
5967
5968 static int store_imsm_mpb(int fd, struct imsm_super *mpb)
5969 {
5970 void *buf = mpb;
5971 __u32 mpb_size = __le32_to_cpu(mpb->mpb_size);
5972 unsigned long long dsize;
5973 unsigned long long sectors;
5974
5975 get_dev_size(fd, NULL, &dsize);
5976
5977 if (mpb_size > 512) {
5978 /* -1 to account for anchor */
5979 sectors = mpb_sectors(mpb) - 1;
5980
5981 /* write the extended mpb to the sectors preceeding the anchor */
5982 if (lseek64(fd, dsize - (512 * (2 + sectors)), SEEK_SET) < 0)
5983 return 1;
5984
5985 if ((unsigned long long)write(fd, buf + 512, 512 * sectors)
5986 != 512 * sectors)
5987 return 1;
5988 }
5989
5990 /* first block is stored on second to last sector of the disk */
5991 if (lseek64(fd, dsize - (512 * 2), SEEK_SET) < 0)
5992 return 1;
5993
5994 if (write(fd, buf, 512) != 512)
5995 return 1;
5996
5997 return 0;
5998 }
5999
6000 static void imsm_sync_metadata(struct supertype *container)
6001 {
6002 struct intel_super *super = container->sb;
6003
6004 dprintf("sync metadata: %d\n", super->updates_pending);
6005 if (!super->updates_pending)
6006 return;
6007
6008 write_super_imsm(container, 0);
6009
6010 super->updates_pending = 0;
6011 }
6012
6013 static struct dl *imsm_readd(struct intel_super *super, int idx, struct active_array *a)
6014 {
6015 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
6016 int i = get_imsm_disk_idx(dev, idx, -1);
6017 struct dl *dl;
6018
6019 for (dl = super->disks; dl; dl = dl->next)
6020 if (dl->index == i)
6021 break;
6022
6023 if (dl && is_failed(&dl->disk))
6024 dl = NULL;
6025
6026 if (dl)
6027 dprintf("%s: found %x:%x\n", __func__, dl->major, dl->minor);
6028
6029 return dl;
6030 }
6031
6032 static struct dl *imsm_add_spare(struct intel_super *super, int slot,
6033 struct active_array *a, int activate_new,
6034 struct mdinfo *additional_test_list)
6035 {
6036 struct imsm_dev *dev = get_imsm_dev(super, a->info.container_member);
6037 int idx = get_imsm_disk_idx(dev, slot, -1);
6038 struct imsm_super *mpb = super->anchor;
6039 struct imsm_map *map;
6040 unsigned long long pos;
6041 struct mdinfo *d;
6042 struct extent *ex;
6043 int i, j;
6044 int found;
6045 __u32 array_start = 0;
6046 __u32 array_end = 0;
6047 struct dl *dl;
6048 struct mdinfo *test_list;
6049
6050 for (dl = super->disks; dl; dl = dl->next) {
6051 /* If in this array, skip */
6052 for (d = a->info.devs ; d ; d = d->next)
6053 if (d->state_fd >= 0 &&
6054 d->disk.major == dl->major &&
6055 d->disk.minor == dl->minor) {
6056 dprintf("%x:%x already in array\n",
6057 dl->major, dl->minor);
6058 break;
6059 }
6060 if (d)
6061 continue;
6062 test_list = additional_test_list;
6063 while (test_list) {
6064 if (test_list->disk.major == dl->major &&
6065 test_list->disk.minor == dl->minor) {
6066 dprintf("%x:%x already in additional test list\n",
6067 dl->major, dl->minor);
6068 break;
6069 }
6070 test_list = test_list->next;
6071 }
6072 if (test_list)
6073 continue;
6074
6075 /* skip in use or failed drives */
6076 if (is_failed(&dl->disk) || idx == dl->index ||
6077 dl->index == -2) {
6078 dprintf("%x:%x status (failed: %d index: %d)\n",
6079 dl->major, dl->minor, is_failed(&dl->disk), idx);
6080 continue;
6081 }
6082
6083 /* skip pure spares when we are looking for partially
6084 * assimilated drives
6085 */
6086 if (dl->index == -1 && !activate_new)
6087 continue;
6088
6089 /* Does this unused device have the requisite free space?
6090 * It needs to be able to cover all member volumes
6091 */
6092 ex = get_extents(super, dl);
6093 if (!ex) {
6094 dprintf("cannot get extents\n");
6095 continue;
6096 }
6097 for (i = 0; i < mpb->num_raid_devs; i++) {
6098 dev = get_imsm_dev(super, i);
6099 map = get_imsm_map(dev, 0);
6100
6101 /* check if this disk is already a member of
6102 * this array
6103 */
6104 if (get_imsm_disk_slot(map, dl->index) >= 0)
6105 continue;
6106
6107 found = 0;
6108 j = 0;
6109 pos = 0;
6110 array_start = __le32_to_cpu(map->pba_of_lba0);
6111 array_end = array_start +
6112 __le32_to_cpu(map->blocks_per_member) - 1;
6113
6114 do {
6115 /* check that we can start at pba_of_lba0 with
6116 * blocks_per_member of space
6117 */
6118 if (array_start >= pos && array_end < ex[j].start) {
6119 found = 1;
6120 break;
6121 }
6122 pos = ex[j].start + ex[j].size;
6123 j++;
6124 } while (ex[j-1].size);
6125
6126 if (!found)
6127 break;
6128 }
6129
6130 free(ex);
6131 if (i < mpb->num_raid_devs) {
6132 dprintf("%x:%x does not have %u to %u available\n",
6133 dl->major, dl->minor, array_start, array_end);
6134 /* No room */
6135 continue;
6136 }
6137 return dl;
6138 }
6139
6140 return dl;
6141 }
6142
6143
6144 static int imsm_rebuild_allowed(struct supertype *cont, int dev_idx, int failed)
6145 {
6146 struct imsm_dev *dev2;
6147 struct imsm_map *map;
6148 struct dl *idisk;
6149 int slot;
6150 int idx;
6151 __u8 state;
6152
6153 dev2 = get_imsm_dev(cont->sb, dev_idx);
6154 if (dev2) {
6155 state = imsm_check_degraded(cont->sb, dev2, failed);
6156 if (state == IMSM_T_STATE_FAILED) {
6157 map = get_imsm_map(dev2, 0);
6158 if (!map)
6159 return 1;
6160 for (slot = 0; slot < map->num_members; slot++) {
6161 /*
6162 * Check if failed disks are deleted from intel
6163 * disk list or are marked to be deleted
6164 */
6165 idx = get_imsm_disk_idx(dev2, slot, -1);
6166 idisk = get_imsm_dl_disk(cont->sb, idx);
6167 /*
6168 * Do not rebuild the array if failed disks
6169 * from failed sub-array are not removed from
6170 * container.
6171 */
6172 if (idisk &&
6173 is_failed(&idisk->disk) &&
6174 (idisk->action != DISK_REMOVE))
6175 return 0;
6176 }
6177 }
6178 }
6179 return 1;
6180 }
6181
6182 static struct mdinfo *imsm_activate_spare(struct active_array *a,
6183 struct metadata_update **updates)
6184 {
6185 /**
6186 * Find a device with unused free space and use it to replace a
6187 * failed/vacant region in an array. We replace failed regions one a
6188 * array at a time. The result is that a new spare disk will be added
6189 * to the first failed array and after the monitor has finished
6190 * propagating failures the remainder will be consumed.
6191 *
6192 * FIXME add a capability for mdmon to request spares from another
6193 * container.
6194 */
6195
6196 struct intel_super *super = a->container->sb;
6197 int inst = a->info.container_member;
6198 struct imsm_dev *dev = get_imsm_dev(super, inst);
6199 struct imsm_map *map = get_imsm_map(dev, 0);
6200 int failed = a->info.array.raid_disks;
6201 struct mdinfo *rv = NULL;
6202 struct mdinfo *d;
6203 struct mdinfo *di;
6204 struct metadata_update *mu;
6205 struct dl *dl;
6206 struct imsm_update_activate_spare *u;
6207 int num_spares = 0;
6208 int i;
6209 int allowed;
6210
6211 for (d = a->info.devs ; d ; d = d->next) {
6212 if ((d->curr_state & DS_FAULTY) &&
6213 d->state_fd >= 0)
6214 /* wait for Removal to happen */
6215 return NULL;
6216 if (d->state_fd >= 0)
6217 failed--;
6218 }
6219
6220 dprintf("imsm: activate spare: inst=%d failed=%d (%d) level=%d\n",
6221 inst, failed, a->info.array.raid_disks, a->info.array.level);
6222
6223 if (dev->vol.migr_state &&
6224 dev->vol.migr_type == MIGR_GEN_MIGR)
6225 /* No repair during migration */
6226 return NULL;
6227
6228 if (a->info.array.level == 4)
6229 /* No repair for takeovered array
6230 * imsm doesn't support raid4
6231 */
6232 return NULL;
6233
6234 if (imsm_check_degraded(super, dev, failed) != IMSM_T_STATE_DEGRADED)
6235 return NULL;
6236
6237 /*
6238 * If there are any failed disks check state of the other volume.
6239 * Block rebuild if the another one is failed until failed disks
6240 * are removed from container.
6241 */
6242 if (failed) {
6243 dprintf("found failed disks in %s, check if there another"
6244 "failed sub-array.\n",
6245 dev->volume);
6246 /* check if states of the other volumes allow for rebuild */
6247 for (i = 0; i < super->anchor->num_raid_devs; i++) {
6248 if (i != inst) {
6249 allowed = imsm_rebuild_allowed(a->container,
6250 i, failed);
6251 if (!allowed)
6252 return NULL;
6253 }
6254 }
6255 }
6256
6257 /* For each slot, if it is not working, find a spare */
6258 for (i = 0; i < a->info.array.raid_disks; i++) {
6259 for (d = a->info.devs ; d ; d = d->next)
6260 if (d->disk.raid_disk == i)
6261 break;
6262 dprintf("found %d: %p %x\n", i, d, d?d->curr_state:0);
6263 if (d && (d->state_fd >= 0))
6264 continue;
6265
6266 /*
6267 * OK, this device needs recovery. Try to re-add the
6268 * previous occupant of this slot, if this fails see if
6269 * we can continue the assimilation of a spare that was
6270 * partially assimilated, finally try to activate a new
6271 * spare.
6272 */
6273 dl = imsm_readd(super, i, a);
6274 if (!dl)
6275 dl = imsm_add_spare(super, i, a, 0, NULL);
6276 if (!dl)
6277 dl = imsm_add_spare(super, i, a, 1, NULL);
6278 if (!dl)
6279 continue;
6280
6281 /* found a usable disk with enough space */
6282 di = malloc(sizeof(*di));
6283 if (!di)
6284 continue;
6285 memset(di, 0, sizeof(*di));
6286
6287 /* dl->index will be -1 in the case we are activating a
6288 * pristine spare. imsm_process_update() will create a
6289 * new index in this case. Once a disk is found to be
6290 * failed in all member arrays it is kicked from the
6291 * metadata
6292 */
6293 di->disk.number = dl->index;
6294
6295 /* (ab)use di->devs to store a pointer to the device
6296 * we chose
6297 */
6298 di->devs = (struct mdinfo *) dl;
6299
6300 di->disk.raid_disk = i;
6301 di->disk.major = dl->major;
6302 di->disk.minor = dl->minor;
6303 di->disk.state = 0;
6304 di->recovery_start = 0;
6305 di->data_offset = __le32_to_cpu(map->pba_of_lba0);
6306 di->component_size = a->info.component_size;
6307 di->container_member = inst;
6308 super->random = random32();
6309 di->next = rv;
6310 rv = di;
6311 num_spares++;
6312 dprintf("%x:%x to be %d at %llu\n", dl->major, dl->minor,
6313 i, di->data_offset);
6314
6315 break;
6316 }
6317
6318 if (!rv)
6319 /* No spares found */
6320 return rv;
6321 /* Now 'rv' has a list of devices to return.
6322 * Create a metadata_update record to update the
6323 * disk_ord_tbl for the array
6324 */
6325 mu = malloc(sizeof(*mu));
6326 if (mu) {
6327 mu->buf = malloc(sizeof(struct imsm_update_activate_spare) * num_spares);
6328 if (mu->buf == NULL) {
6329 free(mu);
6330 mu = NULL;
6331 }
6332 }
6333 if (!mu) {
6334 while (rv) {
6335 struct mdinfo *n = rv->next;
6336
6337 free(rv);
6338 rv = n;
6339 }
6340 return NULL;
6341 }
6342
6343 mu->space = NULL;
6344 mu->space_list = NULL;
6345 mu->len = sizeof(struct imsm_update_activate_spare) * num_spares;
6346 mu->next = *updates;
6347 u = (struct imsm_update_activate_spare *) mu->buf;
6348
6349 for (di = rv ; di ; di = di->next) {
6350 u->type = update_activate_spare;
6351 u->dl = (struct dl *) di->devs;
6352 di->devs = NULL;
6353 u->slot = di->disk.raid_disk;
6354 u->array = inst;
6355 u->next = u + 1;
6356 u++;
6357 }
6358 (u-1)->next = NULL;
6359 *updates = mu;
6360
6361 return rv;
6362 }
6363
6364 static int disks_overlap(struct intel_super *super, int idx, struct imsm_update_create_array *u)
6365 {
6366 struct imsm_dev *dev = get_imsm_dev(super, idx);
6367 struct imsm_map *map = get_imsm_map(dev, 0);
6368 struct imsm_map *new_map = get_imsm_map(&u->dev, 0);
6369 struct disk_info *inf = get_disk_info(u);
6370 struct imsm_disk *disk;
6371 int i;
6372 int j;
6373
6374 for (i = 0; i < map->num_members; i++) {
6375 disk = get_imsm_disk(super, get_imsm_disk_idx(dev, i, -1));
6376 for (j = 0; j < new_map->num_members; j++)
6377 if (serialcmp(disk->serial, inf[j].serial) == 0)
6378 return 1;
6379 }
6380
6381 return 0;
6382 }
6383
6384
6385 static struct dl *get_disk_super(struct intel_super *super, int major, int minor)
6386 {
6387 struct dl *dl = NULL;
6388 for (dl = super->disks; dl; dl = dl->next)
6389 if ((dl->major == major) && (dl->minor == minor))
6390 return dl;
6391 return NULL;
6392 }
6393
6394 static int remove_disk_super(struct intel_super *super, int major, int minor)
6395 {
6396 struct dl *prev = NULL;
6397 struct dl *dl;
6398
6399 prev = NULL;
6400 for (dl = super->disks; dl; dl = dl->next) {
6401 if ((dl->major == major) && (dl->minor == minor)) {
6402 /* remove */
6403 if (prev)
6404 prev->next = dl->next;
6405 else
6406 super->disks = dl->next;
6407 dl->next = NULL;
6408 __free_imsm_disk(dl);
6409 dprintf("%s: removed %x:%x\n",
6410 __func__, major, minor);
6411 break;
6412 }
6413 prev = dl;
6414 }
6415 return 0;
6416 }
6417
6418 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index);
6419
6420 static int add_remove_disk_update(struct intel_super *super)
6421 {
6422 int check_degraded = 0;
6423 struct dl *disk = NULL;
6424 /* add/remove some spares to/from the metadata/contrainer */
6425 while (super->disk_mgmt_list) {
6426 struct dl *disk_cfg;
6427
6428 disk_cfg = super->disk_mgmt_list;
6429 super->disk_mgmt_list = disk_cfg->next;
6430 disk_cfg->next = NULL;
6431
6432 if (disk_cfg->action == DISK_ADD) {
6433 disk_cfg->next = super->disks;
6434 super->disks = disk_cfg;
6435 check_degraded = 1;
6436 dprintf("%s: added %x:%x\n",
6437 __func__, disk_cfg->major,
6438 disk_cfg->minor);
6439 } else if (disk_cfg->action == DISK_REMOVE) {
6440 dprintf("Disk remove action processed: %x.%x\n",
6441 disk_cfg->major, disk_cfg->minor);
6442 disk = get_disk_super(super,
6443 disk_cfg->major,
6444 disk_cfg->minor);
6445 if (disk) {
6446 /* store action status */
6447 disk->action = DISK_REMOVE;
6448 /* remove spare disks only */
6449 if (disk->index == -1) {
6450 remove_disk_super(super,
6451 disk_cfg->major,
6452 disk_cfg->minor);
6453 }
6454 }
6455 /* release allocate disk structure */
6456 __free_imsm_disk(disk_cfg);
6457 }
6458 }
6459 return check_degraded;
6460 }
6461
6462
6463 static int apply_reshape_migration_update(struct imsm_update_reshape_migration *u,
6464 struct intel_super *super,
6465 void ***space_list)
6466 {
6467 struct intel_dev *id;
6468 void **tofree = NULL;
6469 int ret_val = 0;
6470
6471 dprintf("apply_reshape_migration_update()\n");
6472 if ((u->subdev < 0) ||
6473 (u->subdev > 1)) {
6474 dprintf("imsm: Error: Wrong subdev: %i\n", u->subdev);
6475 return ret_val;
6476 }
6477 if ((space_list == NULL) || (*space_list == NULL)) {
6478 dprintf("imsm: Error: Memory is not allocated\n");
6479 return ret_val;
6480 }
6481
6482 for (id = super->devlist ; id; id = id->next) {
6483 if (id->index == (unsigned)u->subdev) {
6484 struct imsm_dev *dev = get_imsm_dev(super, u->subdev);
6485 struct imsm_map *map;
6486 struct imsm_dev *new_dev =
6487 (struct imsm_dev *)*space_list;
6488 struct imsm_map *migr_map = get_imsm_map(dev, 1);
6489 int to_state;
6490 struct dl *new_disk;
6491
6492 if (new_dev == NULL)
6493 return ret_val;
6494 *space_list = **space_list;
6495 memcpy(new_dev, dev, sizeof_imsm_dev(dev, 0));
6496 map = get_imsm_map(new_dev, 0);
6497 if (migr_map) {
6498 dprintf("imsm: Error: migration in progress");
6499 return ret_val;
6500 }
6501
6502 to_state = map->map_state;
6503 if ((u->new_level == 5) && (map->raid_level == 0)) {
6504 map->num_members++;
6505 /* this should not happen */
6506 if (u->new_disks[0] < 0) {
6507 map->failed_disk_num =
6508 map->num_members - 1;
6509 to_state = IMSM_T_STATE_DEGRADED;
6510 } else
6511 to_state = IMSM_T_STATE_NORMAL;
6512 }
6513 migrate(new_dev, super, to_state, MIGR_GEN_MIGR);
6514 if (u->new_level > -1)
6515 map->raid_level = u->new_level;
6516 migr_map = get_imsm_map(new_dev, 1);
6517 if ((u->new_level == 5) &&
6518 (migr_map->raid_level == 0)) {
6519 int ord = map->num_members - 1;
6520 migr_map->num_members--;
6521 if (u->new_disks[0] < 0)
6522 ord |= IMSM_ORD_REBUILD;
6523 set_imsm_ord_tbl_ent(map,
6524 map->num_members - 1,
6525 ord);
6526 }
6527 id->dev = new_dev;
6528 tofree = (void **)dev;
6529
6530 /* update chunk size
6531 */
6532 if (u->new_chunksize > 0)
6533 map->blocks_per_strip =
6534 __cpu_to_le16(u->new_chunksize * 2);
6535
6536 /* add disk
6537 */
6538 if ((u->new_level != 5) ||
6539 (migr_map->raid_level != 0) ||
6540 (migr_map->raid_level == map->raid_level))
6541 goto skip_disk_add;
6542
6543 if (u->new_disks[0] >= 0) {
6544 /* use passes spare
6545 */
6546 new_disk = get_disk_super(super,
6547 major(u->new_disks[0]),
6548 minor(u->new_disks[0]));
6549 dprintf("imsm: new disk for reshape is: %i:%i "
6550 "(%p, index = %i)\n",
6551 major(u->new_disks[0]),
6552 minor(u->new_disks[0]),
6553 new_disk, new_disk->index);
6554 if (new_disk == NULL)
6555 goto error_disk_add;
6556
6557 new_disk->index = map->num_members - 1;
6558 /* slot to fill in autolayout
6559 */
6560 new_disk->raiddisk = new_disk->index;
6561 new_disk->disk.status |= CONFIGURED_DISK;
6562 new_disk->disk.status &= ~SPARE_DISK;
6563 } else
6564 goto error_disk_add;
6565
6566 skip_disk_add:
6567 *tofree = *space_list;
6568 /* calculate new size
6569 */
6570 imsm_set_array_size(new_dev);
6571
6572 ret_val = 1;
6573 }
6574 }
6575
6576 if (tofree)
6577 *space_list = tofree;
6578 return ret_val;
6579
6580 error_disk_add:
6581 dprintf("Error: imsm: Cannot find disk.\n");
6582 return ret_val;
6583 }
6584
6585
6586 static int apply_reshape_container_disks_update(struct imsm_update_reshape *u,
6587 struct intel_super *super,
6588 void ***space_list)
6589 {
6590 struct dl *new_disk;
6591 struct intel_dev *id;
6592 int i;
6593 int delta_disks = u->new_raid_disks - u->old_raid_disks;
6594 int disk_count = u->old_raid_disks;
6595 void **tofree = NULL;
6596 int devices_to_reshape = 1;
6597 struct imsm_super *mpb = super->anchor;
6598 int ret_val = 0;
6599 unsigned int dev_id;
6600
6601 dprintf("imsm: apply_reshape_container_disks_update()\n");
6602
6603 /* enable spares to use in array */
6604 for (i = 0; i < delta_disks; i++) {
6605 new_disk = get_disk_super(super,
6606 major(u->new_disks[i]),
6607 minor(u->new_disks[i]));
6608 dprintf("imsm: new disk for reshape is: %i:%i "
6609 "(%p, index = %i)\n",
6610 major(u->new_disks[i]), minor(u->new_disks[i]),
6611 new_disk, new_disk->index);
6612 if ((new_disk == NULL) ||
6613 ((new_disk->index >= 0) &&
6614 (new_disk->index < u->old_raid_disks)))
6615 goto update_reshape_exit;
6616 new_disk->index = disk_count++;
6617 /* slot to fill in autolayout
6618 */
6619 new_disk->raiddisk = new_disk->index;
6620 new_disk->disk.status |=
6621 CONFIGURED_DISK;
6622 new_disk->disk.status &= ~SPARE_DISK;
6623 }
6624
6625 dprintf("imsm: volume set mpb->num_raid_devs = %i\n",
6626 mpb->num_raid_devs);
6627 /* manage changes in volume
6628 */
6629 for (dev_id = 0; dev_id < mpb->num_raid_devs; dev_id++) {
6630 void **sp = *space_list;
6631 struct imsm_dev *newdev;
6632 struct imsm_map *newmap, *oldmap;
6633
6634 for (id = super->devlist ; id; id = id->next) {
6635 if (id->index == dev_id)
6636 break;
6637 }
6638 if (id == NULL)
6639 break;
6640 if (!sp)
6641 continue;
6642 *space_list = *sp;
6643 newdev = (void*)sp;
6644 /* Copy the dev, but not (all of) the map */
6645 memcpy(newdev, id->dev, sizeof(*newdev));
6646 oldmap = get_imsm_map(id->dev, 0);
6647 newmap = get_imsm_map(newdev, 0);
6648 /* Copy the current map */
6649 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6650 /* update one device only
6651 */
6652 if (devices_to_reshape) {
6653 dprintf("imsm: modifying subdev: %i\n",
6654 id->index);
6655 devices_to_reshape--;
6656 newdev->vol.migr_state = 1;
6657 newdev->vol.curr_migr_unit = 0;
6658 newdev->vol.migr_type = MIGR_GEN_MIGR;
6659 newmap->num_members = u->new_raid_disks;
6660 for (i = 0; i < delta_disks; i++) {
6661 set_imsm_ord_tbl_ent(newmap,
6662 u->old_raid_disks + i,
6663 u->old_raid_disks + i);
6664 }
6665 /* New map is correct, now need to save old map
6666 */
6667 newmap = get_imsm_map(newdev, 1);
6668 memcpy(newmap, oldmap, sizeof_imsm_map(oldmap));
6669
6670 imsm_set_array_size(newdev);
6671 }
6672
6673 sp = (void **)id->dev;
6674 id->dev = newdev;
6675 *sp = tofree;
6676 tofree = sp;
6677
6678 /* Clear migration record */
6679 memset(super->migr_rec, 0, sizeof(struct migr_record));
6680 }
6681 if (tofree)
6682 *space_list = tofree;
6683 ret_val = 1;
6684
6685 update_reshape_exit:
6686
6687 return ret_val;
6688 }
6689
6690 static int apply_takeover_update(struct imsm_update_takeover *u,
6691 struct intel_super *super,
6692 void ***space_list)
6693 {
6694 struct imsm_dev *dev = NULL;
6695 struct intel_dev *dv;
6696 struct imsm_dev *dev_new;
6697 struct imsm_map *map;
6698 struct dl *dm, *du;
6699 int i;
6700
6701 for (dv = super->devlist; dv; dv = dv->next)
6702 if (dv->index == (unsigned int)u->subarray) {
6703 dev = dv->dev;
6704 break;
6705 }
6706
6707 if (dev == NULL)
6708 return 0;
6709
6710 map = get_imsm_map(dev, 0);
6711
6712 if (u->direction == R10_TO_R0) {
6713 /* Number of failed disks must be half of initial disk number */
6714 if (imsm_count_failed(super, dev) != (map->num_members / 2))
6715 return 0;
6716
6717 /* iterate through devices to mark removed disks as spare */
6718 for (dm = super->disks; dm; dm = dm->next) {
6719 if (dm->disk.status & FAILED_DISK) {
6720 int idx = dm->index;
6721 /* update indexes on the disk list */
6722 /* FIXME this loop-with-the-loop looks wrong, I'm not convinced
6723 the index values will end up being correct.... NB */
6724 for (du = super->disks; du; du = du->next)
6725 if (du->index > idx)
6726 du->index--;
6727 /* mark as spare disk */
6728 dm->disk.status = SPARE_DISK;
6729 dm->index = -1;
6730 }
6731 }
6732 /* update map */
6733 map->num_members = map->num_members / 2;
6734 map->map_state = IMSM_T_STATE_NORMAL;
6735 map->num_domains = 1;
6736 map->raid_level = 0;
6737 map->failed_disk_num = -1;
6738 }
6739
6740 if (u->direction == R0_TO_R10) {
6741 void **space;
6742 /* update slots in current disk list */
6743 for (dm = super->disks; dm; dm = dm->next) {
6744 if (dm->index >= 0)
6745 dm->index *= 2;
6746 }
6747 /* create new *missing* disks */
6748 for (i = 0; i < map->num_members; i++) {
6749 space = *space_list;
6750 if (!space)
6751 continue;
6752 *space_list = *space;
6753 du = (void *)space;
6754 memcpy(du, super->disks, sizeof(*du));
6755 du->fd = -1;
6756 du->minor = 0;
6757 du->major = 0;
6758 du->index = (i * 2) + 1;
6759 sprintf((char *)du->disk.serial,
6760 " MISSING_%d", du->index);
6761 sprintf((char *)du->serial,
6762 "MISSING_%d", du->index);
6763 du->next = super->missing;
6764 super->missing = du;
6765 }
6766 /* create new dev and map */
6767 space = *space_list;
6768 if (!space)
6769 return 0;
6770 *space_list = *space;
6771 dev_new = (void *)space;
6772 memcpy(dev_new, dev, sizeof(*dev));
6773 /* update new map */
6774 map = get_imsm_map(dev_new, 0);
6775 map->num_members = map->num_members * 2;
6776 map->map_state = IMSM_T_STATE_DEGRADED;
6777 map->num_domains = 2;
6778 map->raid_level = 1;
6779 /* replace dev<->dev_new */
6780 dv->dev = dev_new;
6781 }
6782 /* update disk order table */
6783 for (du = super->disks; du; du = du->next)
6784 if (du->index >= 0)
6785 set_imsm_ord_tbl_ent(map, du->index, du->index);
6786 for (du = super->missing; du; du = du->next)
6787 if (du->index >= 0) {
6788 set_imsm_ord_tbl_ent(map, du->index, du->index);
6789 mark_missing(dev_new, &du->disk, du->index);
6790 }
6791
6792 return 1;
6793 }
6794
6795 static void imsm_process_update(struct supertype *st,
6796 struct metadata_update *update)
6797 {
6798 /**
6799 * crack open the metadata_update envelope to find the update record
6800 * update can be one of:
6801 * update_reshape_container_disks - all the arrays in the container
6802 * are being reshaped to have more devices. We need to mark
6803 * the arrays for general migration and convert selected spares
6804 * into active devices.
6805 * update_activate_spare - a spare device has replaced a failed
6806 * device in an array, update the disk_ord_tbl. If this disk is
6807 * present in all member arrays then also clear the SPARE_DISK
6808 * flag
6809 * update_create_array
6810 * update_kill_array
6811 * update_rename_array
6812 * update_add_remove_disk
6813 */
6814 struct intel_super *super = st->sb;
6815 struct imsm_super *mpb;
6816 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
6817
6818 /* update requires a larger buf but the allocation failed */
6819 if (super->next_len && !super->next_buf) {
6820 super->next_len = 0;
6821 return;
6822 }
6823
6824 if (super->next_buf) {
6825 memcpy(super->next_buf, super->buf, super->len);
6826 free(super->buf);
6827 super->len = super->next_len;
6828 super->buf = super->next_buf;
6829
6830 super->next_len = 0;
6831 super->next_buf = NULL;
6832 }
6833
6834 mpb = super->anchor;
6835
6836 switch (type) {
6837 case update_takeover: {
6838 struct imsm_update_takeover *u = (void *)update->buf;
6839 if (apply_takeover_update(u, super, &update->space_list)) {
6840 imsm_update_version_info(super);
6841 super->updates_pending++;
6842 }
6843 break;
6844 }
6845
6846 case update_reshape_container_disks: {
6847 struct imsm_update_reshape *u = (void *)update->buf;
6848 if (apply_reshape_container_disks_update(
6849 u, super, &update->space_list))
6850 super->updates_pending++;
6851 break;
6852 }
6853 case update_reshape_migration: {
6854 struct imsm_update_reshape_migration *u = (void *)update->buf;
6855 if (apply_reshape_migration_update(
6856 u, super, &update->space_list))
6857 super->updates_pending++;
6858 break;
6859 }
6860 case update_activate_spare: {
6861 struct imsm_update_activate_spare *u = (void *) update->buf;
6862 struct imsm_dev *dev = get_imsm_dev(super, u->array);
6863 struct imsm_map *map = get_imsm_map(dev, 0);
6864 struct imsm_map *migr_map;
6865 struct active_array *a;
6866 struct imsm_disk *disk;
6867 __u8 to_state;
6868 struct dl *dl;
6869 unsigned int found;
6870 int failed;
6871 int victim = get_imsm_disk_idx(dev, u->slot, -1);
6872 int i;
6873
6874 for (dl = super->disks; dl; dl = dl->next)
6875 if (dl == u->dl)
6876 break;
6877
6878 if (!dl) {
6879 fprintf(stderr, "error: imsm_activate_spare passed "
6880 "an unknown disk (index: %d)\n",
6881 u->dl->index);
6882 return;
6883 }
6884
6885 super->updates_pending++;
6886 /* count failures (excluding rebuilds and the victim)
6887 * to determine map[0] state
6888 */
6889 failed = 0;
6890 for (i = 0; i < map->num_members; i++) {
6891 if (i == u->slot)
6892 continue;
6893 disk = get_imsm_disk(super,
6894 get_imsm_disk_idx(dev, i, -1));
6895 if (!disk || is_failed(disk))
6896 failed++;
6897 }
6898
6899 /* adding a pristine spare, assign a new index */
6900 if (dl->index < 0) {
6901 dl->index = super->anchor->num_disks;
6902 super->anchor->num_disks++;
6903 }
6904 disk = &dl->disk;
6905 disk->status |= CONFIGURED_DISK;
6906 disk->status &= ~SPARE_DISK;
6907
6908 /* mark rebuild */
6909 to_state = imsm_check_degraded(super, dev, failed);
6910 map->map_state = IMSM_T_STATE_DEGRADED;
6911 migrate(dev, super, to_state, MIGR_REBUILD);
6912 migr_map = get_imsm_map(dev, 1);
6913 set_imsm_ord_tbl_ent(map, u->slot, dl->index);
6914 set_imsm_ord_tbl_ent(migr_map, u->slot, dl->index | IMSM_ORD_REBUILD);
6915
6916 /* update the family_num to mark a new container
6917 * generation, being careful to record the existing
6918 * family_num in orig_family_num to clean up after
6919 * earlier mdadm versions that neglected to set it.
6920 */
6921 if (mpb->orig_family_num == 0)
6922 mpb->orig_family_num = mpb->family_num;
6923 mpb->family_num += super->random;
6924
6925 /* count arrays using the victim in the metadata */
6926 found = 0;
6927 for (a = st->arrays; a ; a = a->next) {
6928 dev = get_imsm_dev(super, a->info.container_member);
6929 map = get_imsm_map(dev, 0);
6930
6931 if (get_imsm_disk_slot(map, victim) >= 0)
6932 found++;
6933 }
6934
6935 /* delete the victim if it is no longer being
6936 * utilized anywhere
6937 */
6938 if (!found) {
6939 struct dl **dlp;
6940
6941 /* We know that 'manager' isn't touching anything,
6942 * so it is safe to delete
6943 */
6944 for (dlp = &super->disks; *dlp; dlp = &(*dlp)->next)
6945 if ((*dlp)->index == victim)
6946 break;
6947
6948 /* victim may be on the missing list */
6949 if (!*dlp)
6950 for (dlp = &super->missing; *dlp; dlp = &(*dlp)->next)
6951 if ((*dlp)->index == victim)
6952 break;
6953 imsm_delete(super, dlp, victim);
6954 }
6955 break;
6956 }
6957 case update_create_array: {
6958 /* someone wants to create a new array, we need to be aware of
6959 * a few races/collisions:
6960 * 1/ 'Create' called by two separate instances of mdadm
6961 * 2/ 'Create' versus 'activate_spare': mdadm has chosen
6962 * devices that have since been assimilated via
6963 * activate_spare.
6964 * In the event this update can not be carried out mdadm will
6965 * (FIX ME) notice that its update did not take hold.
6966 */
6967 struct imsm_update_create_array *u = (void *) update->buf;
6968 struct intel_dev *dv;
6969 struct imsm_dev *dev;
6970 struct imsm_map *map, *new_map;
6971 unsigned long long start, end;
6972 unsigned long long new_start, new_end;
6973 int i;
6974 struct disk_info *inf;
6975 struct dl *dl;
6976
6977 /* handle racing creates: first come first serve */
6978 if (u->dev_idx < mpb->num_raid_devs) {
6979 dprintf("%s: subarray %d already defined\n",
6980 __func__, u->dev_idx);
6981 goto create_error;
6982 }
6983
6984 /* check update is next in sequence */
6985 if (u->dev_idx != mpb->num_raid_devs) {
6986 dprintf("%s: can not create array %d expected index %d\n",
6987 __func__, u->dev_idx, mpb->num_raid_devs);
6988 goto create_error;
6989 }
6990
6991 new_map = get_imsm_map(&u->dev, 0);
6992 new_start = __le32_to_cpu(new_map->pba_of_lba0);
6993 new_end = new_start + __le32_to_cpu(new_map->blocks_per_member);
6994 inf = get_disk_info(u);
6995
6996 /* handle activate_spare versus create race:
6997 * check to make sure that overlapping arrays do not include
6998 * overalpping disks
6999 */
7000 for (i = 0; i < mpb->num_raid_devs; i++) {
7001 dev = get_imsm_dev(super, i);
7002 map = get_imsm_map(dev, 0);
7003 start = __le32_to_cpu(map->pba_of_lba0);
7004 end = start + __le32_to_cpu(map->blocks_per_member);
7005 if ((new_start >= start && new_start <= end) ||
7006 (start >= new_start && start <= new_end))
7007 /* overlap */;
7008 else
7009 continue;
7010
7011 if (disks_overlap(super, i, u)) {
7012 dprintf("%s: arrays overlap\n", __func__);
7013 goto create_error;
7014 }
7015 }
7016
7017 /* check that prepare update was successful */
7018 if (!update->space) {
7019 dprintf("%s: prepare update failed\n", __func__);
7020 goto create_error;
7021 }
7022
7023 /* check that all disks are still active before committing
7024 * changes. FIXME: could we instead handle this by creating a
7025 * degraded array? That's probably not what the user expects,
7026 * so better to drop this update on the floor.
7027 */
7028 for (i = 0; i < new_map->num_members; i++) {
7029 dl = serial_to_dl(inf[i].serial, super);
7030 if (!dl) {
7031 dprintf("%s: disk disappeared\n", __func__);
7032 goto create_error;
7033 }
7034 }
7035
7036 super->updates_pending++;
7037
7038 /* convert spares to members and fixup ord_tbl */
7039 for (i = 0; i < new_map->num_members; i++) {
7040 dl = serial_to_dl(inf[i].serial, super);
7041 if (dl->index == -1) {
7042 dl->index = mpb->num_disks;
7043 mpb->num_disks++;
7044 dl->disk.status |= CONFIGURED_DISK;
7045 dl->disk.status &= ~SPARE_DISK;
7046 }
7047 set_imsm_ord_tbl_ent(new_map, i, dl->index);
7048 }
7049
7050 dv = update->space;
7051 dev = dv->dev;
7052 update->space = NULL;
7053 imsm_copy_dev(dev, &u->dev);
7054 dv->index = u->dev_idx;
7055 dv->next = super->devlist;
7056 super->devlist = dv;
7057 mpb->num_raid_devs++;
7058
7059 imsm_update_version_info(super);
7060 break;
7061 create_error:
7062 /* mdmon knows how to release update->space, but not
7063 * ((struct intel_dev *) update->space)->dev
7064 */
7065 if (update->space) {
7066 dv = update->space;
7067 free(dv->dev);
7068 }
7069 break;
7070 }
7071 case update_kill_array: {
7072 struct imsm_update_kill_array *u = (void *) update->buf;
7073 int victim = u->dev_idx;
7074 struct active_array *a;
7075 struct intel_dev **dp;
7076 struct imsm_dev *dev;
7077
7078 /* sanity check that we are not affecting the uuid of
7079 * active arrays, or deleting an active array
7080 *
7081 * FIXME when immutable ids are available, but note that
7082 * we'll also need to fixup the invalidated/active
7083 * subarray indexes in mdstat
7084 */
7085 for (a = st->arrays; a; a = a->next)
7086 if (a->info.container_member >= victim)
7087 break;
7088 /* by definition if mdmon is running at least one array
7089 * is active in the container, so checking
7090 * mpb->num_raid_devs is just extra paranoia
7091 */
7092 dev = get_imsm_dev(super, victim);
7093 if (a || !dev || mpb->num_raid_devs == 1) {
7094 dprintf("failed to delete subarray-%d\n", victim);
7095 break;
7096 }
7097
7098 for (dp = &super->devlist; *dp;)
7099 if ((*dp)->index == (unsigned)super->current_vol) {
7100 *dp = (*dp)->next;
7101 } else {
7102 if ((*dp)->index > (unsigned)victim)
7103 (*dp)->index--;
7104 dp = &(*dp)->next;
7105 }
7106 mpb->num_raid_devs--;
7107 super->updates_pending++;
7108 break;
7109 }
7110 case update_rename_array: {
7111 struct imsm_update_rename_array *u = (void *) update->buf;
7112 char name[MAX_RAID_SERIAL_LEN+1];
7113 int target = u->dev_idx;
7114 struct active_array *a;
7115 struct imsm_dev *dev;
7116
7117 /* sanity check that we are not affecting the uuid of
7118 * an active array
7119 */
7120 snprintf(name, MAX_RAID_SERIAL_LEN, "%s", (char *) u->name);
7121 name[MAX_RAID_SERIAL_LEN] = '\0';
7122 for (a = st->arrays; a; a = a->next)
7123 if (a->info.container_member == target)
7124 break;
7125 dev = get_imsm_dev(super, u->dev_idx);
7126 if (a || !dev || !check_name(super, name, 1)) {
7127 dprintf("failed to rename subarray-%d\n", target);
7128 break;
7129 }
7130
7131 snprintf((char *) dev->volume, MAX_RAID_SERIAL_LEN, "%s", name);
7132 super->updates_pending++;
7133 break;
7134 }
7135 case update_add_remove_disk: {
7136 /* we may be able to repair some arrays if disks are
7137 * being added, check teh status of add_remove_disk
7138 * if discs has been added.
7139 */
7140 if (add_remove_disk_update(super)) {
7141 struct active_array *a;
7142
7143 super->updates_pending++;
7144 for (a = st->arrays; a; a = a->next)
7145 a->check_degraded = 1;
7146 }
7147 break;
7148 }
7149 default:
7150 fprintf(stderr, "error: unsuported process update type:"
7151 "(type: %d)\n", type);
7152 }
7153 }
7154
7155 static struct mdinfo *get_spares_for_grow(struct supertype *st);
7156
7157 static void imsm_prepare_update(struct supertype *st,
7158 struct metadata_update *update)
7159 {
7160 /**
7161 * Allocate space to hold new disk entries, raid-device entries or a new
7162 * mpb if necessary. The manager synchronously waits for updates to
7163 * complete in the monitor, so new mpb buffers allocated here can be
7164 * integrated by the monitor thread without worrying about live pointers
7165 * in the manager thread.
7166 */
7167 enum imsm_update_type type = *(enum imsm_update_type *) update->buf;
7168 struct intel_super *super = st->sb;
7169 struct imsm_super *mpb = super->anchor;
7170 size_t buf_len;
7171 size_t len = 0;
7172
7173 switch (type) {
7174 case update_takeover: {
7175 struct imsm_update_takeover *u = (void *)update->buf;
7176 if (u->direction == R0_TO_R10) {
7177 void **tail = (void **)&update->space_list;
7178 struct imsm_dev *dev = get_imsm_dev(super, u->subarray);
7179 struct imsm_map *map = get_imsm_map(dev, 0);
7180 int num_members = map->num_members;
7181 void *space;
7182 int size, i;
7183 int err = 0;
7184 /* allocate memory for added disks */
7185 for (i = 0; i < num_members; i++) {
7186 size = sizeof(struct dl);
7187 space = malloc(size);
7188 if (!space) {
7189 err++;
7190 break;
7191 }
7192 *tail = space;
7193 tail = space;
7194 *tail = NULL;
7195 }
7196 /* allocate memory for new device */
7197 size = sizeof_imsm_dev(super->devlist->dev, 0) +
7198 (num_members * sizeof(__u32));
7199 space = malloc(size);
7200 if (!space)
7201 err++;
7202 else {
7203 *tail = space;
7204 tail = space;
7205 *tail = NULL;
7206 }
7207 if (!err) {
7208 len = disks_to_mpb_size(num_members * 2);
7209 } else {
7210 /* if allocation didn't success, free buffer */
7211 while (update->space_list) {
7212 void **sp = update->space_list;
7213 update->space_list = *sp;
7214 free(sp);
7215 }
7216 }
7217 }
7218
7219 break;
7220 }
7221 case update_reshape_container_disks: {
7222 /* Every raid device in the container is about to
7223 * gain some more devices, and we will enter a
7224 * reconfiguration.
7225 * So each 'imsm_map' will be bigger, and the imsm_vol
7226 * will now hold 2 of them.
7227 * Thus we need new 'struct imsm_dev' allocations sized
7228 * as sizeof_imsm_dev but with more devices in both maps.
7229 */
7230 struct imsm_update_reshape *u = (void *)update->buf;
7231 struct intel_dev *dl;
7232 void **space_tail = (void**)&update->space_list;
7233
7234 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7235
7236 for (dl = super->devlist; dl; dl = dl->next) {
7237 int size = sizeof_imsm_dev(dl->dev, 1);
7238 void *s;
7239 if (u->new_raid_disks > u->old_raid_disks)
7240 size += sizeof(__u32)*2*
7241 (u->new_raid_disks - u->old_raid_disks);
7242 s = malloc(size);
7243 if (!s)
7244 break;
7245 *space_tail = s;
7246 space_tail = s;
7247 *space_tail = NULL;
7248 }
7249
7250 len = disks_to_mpb_size(u->new_raid_disks);
7251 dprintf("New anchor length is %llu\n", (unsigned long long)len);
7252 break;
7253 }
7254 case update_reshape_migration: {
7255 /* for migration level 0->5 we need to add disks
7256 * so the same as for container operation we will copy
7257 * device to the bigger location.
7258 * in memory prepared device and new disk area are prepared
7259 * for usage in process update
7260 */
7261 struct imsm_update_reshape_migration *u = (void *)update->buf;
7262 struct intel_dev *id;
7263 void **space_tail = (void **)&update->space_list;
7264 int size;
7265 void *s;
7266 int current_level = -1;
7267
7268 dprintf("imsm: imsm_prepare_update() for update_reshape\n");
7269
7270 /* add space for bigger array in update
7271 */
7272 for (id = super->devlist; id; id = id->next) {
7273 if (id->index == (unsigned)u->subdev) {
7274 size = sizeof_imsm_dev(id->dev, 1);
7275 if (u->new_raid_disks > u->old_raid_disks)
7276 size += sizeof(__u32)*2*
7277 (u->new_raid_disks - u->old_raid_disks);
7278 s = malloc(size);
7279 if (!s)
7280 break;
7281 *space_tail = s;
7282 space_tail = s;
7283 *space_tail = NULL;
7284 break;
7285 }
7286 }
7287 if (update->space_list == NULL)
7288 break;
7289
7290 /* add space for disk in update
7291 */
7292 size = sizeof(struct dl);
7293 s = malloc(size);
7294 if (!s) {
7295 free(update->space_list);
7296 update->space_list = NULL;
7297 break;
7298 }
7299 *space_tail = s;
7300 space_tail = s;
7301 *space_tail = NULL;
7302
7303 /* add spare device to update
7304 */
7305 for (id = super->devlist ; id; id = id->next)
7306 if (id->index == (unsigned)u->subdev) {
7307 struct imsm_dev *dev;
7308 struct imsm_map *map;
7309
7310 dev = get_imsm_dev(super, u->subdev);
7311 map = get_imsm_map(dev, 0);
7312 current_level = map->raid_level;
7313 break;
7314 }
7315 if ((u->new_level == 5) && (u->new_level != current_level)) {
7316 struct mdinfo *spares;
7317
7318 spares = get_spares_for_grow(st);
7319 if (spares) {
7320 struct dl *dl;
7321 struct mdinfo *dev;
7322
7323 dev = spares->devs;
7324 if (dev) {
7325 u->new_disks[0] =
7326 makedev(dev->disk.major,
7327 dev->disk.minor);
7328 dl = get_disk_super(super,
7329 dev->disk.major,
7330 dev->disk.minor);
7331 dl->index = u->old_raid_disks;
7332 dev = dev->next;
7333 }
7334 sysfs_free(spares);
7335 }
7336 }
7337 len = disks_to_mpb_size(u->new_raid_disks);
7338 dprintf("New anchor length is %llu\n", (unsigned long long)len);
7339 break;
7340 }
7341 case update_create_array: {
7342 struct imsm_update_create_array *u = (void *) update->buf;
7343 struct intel_dev *dv;
7344 struct imsm_dev *dev = &u->dev;
7345 struct imsm_map *map = get_imsm_map(dev, 0);
7346 struct dl *dl;
7347 struct disk_info *inf;
7348 int i;
7349 int activate = 0;
7350
7351 inf = get_disk_info(u);
7352 len = sizeof_imsm_dev(dev, 1);
7353 /* allocate a new super->devlist entry */
7354 dv = malloc(sizeof(*dv));
7355 if (dv) {
7356 dv->dev = malloc(len);
7357 if (dv->dev)
7358 update->space = dv;
7359 else {
7360 free(dv);
7361 update->space = NULL;
7362 }
7363 }
7364
7365 /* count how many spares will be converted to members */
7366 for (i = 0; i < map->num_members; i++) {
7367 dl = serial_to_dl(inf[i].serial, super);
7368 if (!dl) {
7369 /* hmm maybe it failed?, nothing we can do about
7370 * it here
7371 */
7372 continue;
7373 }
7374 if (count_memberships(dl, super) == 0)
7375 activate++;
7376 }
7377 len += activate * sizeof(struct imsm_disk);
7378 break;
7379 default:
7380 break;
7381 }
7382 }
7383
7384 /* check if we need a larger metadata buffer */
7385 if (super->next_buf)
7386 buf_len = super->next_len;
7387 else
7388 buf_len = super->len;
7389
7390 if (__le32_to_cpu(mpb->mpb_size) + len > buf_len) {
7391 /* ok we need a larger buf than what is currently allocated
7392 * if this allocation fails process_update will notice that
7393 * ->next_len is set and ->next_buf is NULL
7394 */
7395 buf_len = ROUND_UP(__le32_to_cpu(mpb->mpb_size) + len, 512);
7396 if (super->next_buf)
7397 free(super->next_buf);
7398
7399 super->next_len = buf_len;
7400 if (posix_memalign(&super->next_buf, 512, buf_len) == 0)
7401 memset(super->next_buf, 0, buf_len);
7402 else
7403 super->next_buf = NULL;
7404 }
7405 }
7406
7407 /* must be called while manager is quiesced */
7408 static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned index)
7409 {
7410 struct imsm_super *mpb = super->anchor;
7411 struct dl *iter;
7412 struct imsm_dev *dev;
7413 struct imsm_map *map;
7414 int i, j, num_members;
7415 __u32 ord;
7416
7417 dprintf("%s: deleting device[%d] from imsm_super\n",
7418 __func__, index);
7419
7420 /* shift all indexes down one */
7421 for (iter = super->disks; iter; iter = iter->next)
7422 if (iter->index > (int)index)
7423 iter->index--;
7424 for (iter = super->missing; iter; iter = iter->next)
7425 if (iter->index > (int)index)
7426 iter->index--;
7427
7428 for (i = 0; i < mpb->num_raid_devs; i++) {
7429 dev = get_imsm_dev(super, i);
7430 map = get_imsm_map(dev, 0);
7431 num_members = map->num_members;
7432 for (j = 0; j < num_members; j++) {
7433 /* update ord entries being careful not to propagate
7434 * ord-flags to the first map
7435 */
7436 ord = get_imsm_ord_tbl_ent(dev, j, -1);
7437
7438 if (ord_to_idx(ord) <= index)
7439 continue;
7440
7441 map = get_imsm_map(dev, 0);
7442 set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
7443 map = get_imsm_map(dev, 1);
7444 if (map)
7445 set_imsm_ord_tbl_ent(map, j, ord - 1);
7446 }
7447 }
7448
7449 mpb->num_disks--;
7450 super->updates_pending++;
7451 if (*dlp) {
7452 struct dl *dl = *dlp;
7453
7454 *dlp = (*dlp)->next;
7455 __free_imsm_disk(dl);
7456 }
7457 }
7458
7459 /*******************************************************************************
7460 * Function: open_backup_targets
7461 * Description: Function opens file descriptors for all devices given in
7462 * info->devs
7463 * Parameters:
7464 * info : general array info
7465 * raid_disks : number of disks
7466 * raid_fds : table of device's file descriptors
7467 * Returns:
7468 * 0 : success
7469 * -1 : fail
7470 ******************************************************************************/
7471 int open_backup_targets(struct mdinfo *info, int raid_disks, int *raid_fds)
7472 {
7473 struct mdinfo *sd;
7474
7475 for (sd = info->devs ; sd ; sd = sd->next) {
7476 char *dn;
7477
7478 if (sd->disk.state & (1<<MD_DISK_FAULTY)) {
7479 dprintf("disk is faulty!!\n");
7480 continue;
7481 }
7482
7483 if ((sd->disk.raid_disk >= raid_disks) ||
7484 (sd->disk.raid_disk < 0))
7485 continue;
7486
7487 dn = map_dev(sd->disk.major,
7488 sd->disk.minor, 1);
7489 raid_fds[sd->disk.raid_disk] = dev_open(dn, O_RDWR);
7490 if (raid_fds[sd->disk.raid_disk] < 0) {
7491 fprintf(stderr, "cannot open component\n");
7492 return -1;
7493 }
7494 }
7495 return 0;
7496 }
7497
7498 /*******************************************************************************
7499 * Function: init_migr_record_imsm
7500 * Description: Function inits imsm migration record
7501 * Parameters:
7502 * super : imsm internal array info
7503 * dev : device under migration
7504 * info : general array info to find the smallest device
7505 * Returns:
7506 * none
7507 ******************************************************************************/
7508 void init_migr_record_imsm(struct supertype *st, struct imsm_dev *dev,
7509 struct mdinfo *info)
7510 {
7511 struct intel_super *super = st->sb;
7512 struct migr_record *migr_rec = super->migr_rec;
7513 int new_data_disks;
7514 unsigned long long dsize, dev_sectors;
7515 long long unsigned min_dev_sectors = -1LLU;
7516 struct mdinfo *sd;
7517 char nm[30];
7518 int fd;
7519 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7520 struct imsm_map *map_src = get_imsm_map(dev, 1);
7521 unsigned long long num_migr_units;
7522
7523 unsigned long long array_blocks =
7524 (((unsigned long long)__le32_to_cpu(dev->size_high)) << 32) +
7525 __le32_to_cpu(dev->size_low);
7526
7527 memset(migr_rec, 0, sizeof(struct migr_record));
7528 migr_rec->family_num = __cpu_to_le32(super->anchor->family_num);
7529
7530 /* only ascending reshape supported now */
7531 migr_rec->ascending_migr = __cpu_to_le32(1);
7532
7533 migr_rec->dest_depth_per_unit = GEN_MIGR_AREA_SIZE /
7534 max(map_dest->blocks_per_strip, map_src->blocks_per_strip);
7535 migr_rec->dest_depth_per_unit *= map_dest->blocks_per_strip;
7536 new_data_disks = imsm_num_data_members(dev, 0);
7537 migr_rec->blocks_per_unit =
7538 __cpu_to_le32(migr_rec->dest_depth_per_unit * new_data_disks);
7539 migr_rec->dest_depth_per_unit =
7540 __cpu_to_le32(migr_rec->dest_depth_per_unit);
7541
7542 num_migr_units =
7543 array_blocks / __le32_to_cpu(migr_rec->blocks_per_unit);
7544
7545 if (array_blocks % __le32_to_cpu(migr_rec->blocks_per_unit))
7546 num_migr_units++;
7547 migr_rec->num_migr_units = __cpu_to_le32(num_migr_units);
7548
7549 migr_rec->post_migr_vol_cap = dev->size_low;
7550 migr_rec->post_migr_vol_cap_hi = dev->size_high;
7551
7552
7553 /* Find the smallest dev */
7554 for (sd = info->devs ; sd ; sd = sd->next) {
7555 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
7556 fd = dev_open(nm, O_RDONLY);
7557 if (fd < 0)
7558 continue;
7559 get_dev_size(fd, NULL, &dsize);
7560 dev_sectors = dsize / 512;
7561 if (dev_sectors < min_dev_sectors)
7562 min_dev_sectors = dev_sectors;
7563 close(fd);
7564 }
7565 migr_rec->ckpt_area_pba = __cpu_to_le32(min_dev_sectors -
7566 RAID_DISK_RESERVED_BLOCKS_IMSM_HI);
7567
7568 write_imsm_migr_rec(st);
7569
7570 return;
7571 }
7572
7573 /*******************************************************************************
7574 * Function: save_backup_imsm
7575 * Description: Function saves critical data stripes to Migration Copy Area
7576 * and updates the current migration unit status.
7577 * Use restore_stripes() to form a destination stripe,
7578 * and to write it to the Copy Area.
7579 * Parameters:
7580 * st : supertype information
7581 * info : general array info
7582 * buf : input buffer
7583 * write_offset : address of data to backup
7584 * length : length of data to backup (blocks_per_unit)
7585 * Returns:
7586 * 0 : success
7587 *, -1 : fail
7588 ******************************************************************************/
7589 int save_backup_imsm(struct supertype *st,
7590 struct imsm_dev *dev,
7591 struct mdinfo *info,
7592 void *buf,
7593 int new_data,
7594 int length)
7595 {
7596 int rv = -1;
7597 struct intel_super *super = st->sb;
7598 unsigned long long *target_offsets = NULL;
7599 int *targets = NULL;
7600 int i;
7601 struct imsm_map *map_dest = get_imsm_map(dev, 0);
7602 int new_disks = map_dest->num_members;
7603
7604 targets = malloc(new_disks * sizeof(int));
7605 if (!targets)
7606 goto abort;
7607
7608 target_offsets = malloc(new_disks * sizeof(unsigned long long));
7609 if (!target_offsets)
7610 goto abort;
7611
7612 for (i = 0; i < new_disks; i++) {
7613 targets[i] = -1;
7614 target_offsets[i] = (unsigned long long)
7615 __le32_to_cpu(super->migr_rec->ckpt_area_pba) * 512;
7616 }
7617
7618 if (open_backup_targets(info, new_disks, targets))
7619 goto abort;
7620
7621 if (restore_stripes(targets, /* list of dest devices */
7622 target_offsets, /* migration record offsets */
7623 new_disks,
7624 info->new_chunk,
7625 info->new_level,
7626 info->new_layout,
7627 -1, /* source backup file descriptor */
7628 0, /* input buf offset
7629 * always 0 buf is already offset */
7630 0,
7631 length,
7632 buf) != 0) {
7633 fprintf(stderr, Name ": Error restoring stripes\n");
7634 goto abort;
7635 }
7636
7637 rv = 0;
7638
7639 abort:
7640 if (targets) {
7641 for (i = 0; i < new_disks; i++)
7642 if (targets[i] >= 0)
7643 close(targets[i]);
7644 free(targets);
7645 }
7646 free(target_offsets);
7647
7648 return rv;
7649 }
7650
7651 /*******************************************************************************
7652 * Function: save_checkpoint_imsm
7653 * Description: Function called for current unit status update
7654 * in the migration record. It writes it to disk.
7655 * Parameters:
7656 * super : imsm internal array info
7657 * info : general array info
7658 * Returns:
7659 * 0: success
7660 * 1: failure
7661 ******************************************************************************/
7662 int save_checkpoint_imsm(struct supertype *st, struct mdinfo *info, int state)
7663 {
7664 struct intel_super *super = st->sb;
7665 load_imsm_migr_rec(super, info);
7666 if (__le32_to_cpu(super->migr_rec->blocks_per_unit) == 0) {
7667 dprintf("ERROR: blocks_per_unit = 0!!!\n");
7668 return 1;
7669 }
7670
7671 super->migr_rec->curr_migr_unit =
7672 __cpu_to_le32(info->reshape_progress /
7673 __le32_to_cpu(super->migr_rec->blocks_per_unit));
7674 super->migr_rec->rec_status = __cpu_to_le32(state);
7675 super->migr_rec->dest_1st_member_lba =
7676 __cpu_to_le32((__le32_to_cpu(super->migr_rec->curr_migr_unit))
7677 * __le32_to_cpu(super->migr_rec->dest_depth_per_unit));
7678 if (write_imsm_migr_rec(st) < 0) {
7679 dprintf("imsm: Cannot write migration record "
7680 "outside backup area\n");
7681 return 1;
7682 }
7683
7684 return 0;
7685 }
7686
7687 static __u64 blocks_per_migr_unit(struct intel_super *super,
7688 struct imsm_dev *dev);
7689
7690 /*******************************************************************************
7691 * Function: recover_backup_imsm
7692 * Description: Function recovers critical data from the Migration Copy Area
7693 * while assembling an array.
7694 * Parameters:
7695 * super : imsm internal array info
7696 * info : general array info
7697 * Returns:
7698 * 0 : success (or there is no data to recover)
7699 * 1 : fail
7700 ******************************************************************************/
7701 int recover_backup_imsm(struct supertype *st, struct mdinfo *info)
7702 {
7703 struct intel_super *super = st->sb;
7704 struct migr_record *migr_rec = super->migr_rec;
7705 struct imsm_map *map_dest = NULL;
7706 struct intel_dev *id = NULL;
7707 unsigned long long read_offset;
7708 unsigned long long write_offset;
7709 unsigned unit_len;
7710 int *targets = NULL;
7711 int new_disks, i, err;
7712 char *buf = NULL;
7713 int retval = 1;
7714 unsigned long curr_migr_unit = __le32_to_cpu(migr_rec->curr_migr_unit);
7715 unsigned long num_migr_units = __le32_to_cpu(migr_rec->num_migr_units);
7716 int ascending = __le32_to_cpu(migr_rec->ascending_migr);
7717 char buffer[20];
7718
7719 err = sysfs_get_str(info, NULL, "array_state", (char *)buffer, 20);
7720 if (err < 1)
7721 return 1;
7722
7723 /* recover data only during assemblation */
7724 if (strncmp(buffer, "inactive", 8) != 0)
7725 return 0;
7726 /* no data to recover */
7727 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
7728 return 0;
7729 if (curr_migr_unit >= num_migr_units)
7730 return 1;
7731
7732 /* find device during reshape */
7733 for (id = super->devlist; id; id = id->next)
7734 if (is_gen_migration(id->dev))
7735 break;
7736 if (id == NULL)
7737 return 1;
7738
7739 map_dest = get_imsm_map(id->dev, 0);
7740 new_disks = map_dest->num_members;
7741
7742 read_offset = (unsigned long long)
7743 __le32_to_cpu(migr_rec->ckpt_area_pba) * 512;
7744
7745 write_offset = ((unsigned long long)
7746 __le32_to_cpu(migr_rec->dest_1st_member_lba) +
7747 info->data_offset) * 512;
7748
7749 unit_len = __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
7750 if (posix_memalign((void **)&buf, 512, unit_len) != 0)
7751 goto abort;
7752 targets = malloc(new_disks * sizeof(int));
7753 if (!targets)
7754 goto abort;
7755
7756 open_backup_targets(info, new_disks, targets);
7757
7758 for (i = 0; i < new_disks; i++) {
7759 if (lseek64(targets[i], read_offset, SEEK_SET) < 0) {
7760 fprintf(stderr,
7761 Name ": Cannot seek to block: %s\n",
7762 strerror(errno));
7763 goto abort;
7764 }
7765 if (read(targets[i], buf, unit_len) != unit_len) {
7766 fprintf(stderr,
7767 Name ": Cannot read copy area block: %s\n",
7768 strerror(errno));
7769 goto abort;
7770 }
7771 if (lseek64(targets[i], write_offset, SEEK_SET) < 0) {
7772 fprintf(stderr,
7773 Name ": Cannot seek to block: %s\n",
7774 strerror(errno));
7775 goto abort;
7776 }
7777 if (write(targets[i], buf, unit_len) != unit_len) {
7778 fprintf(stderr,
7779 Name ": Cannot restore block: %s\n",
7780 strerror(errno));
7781 goto abort;
7782 }
7783 }
7784
7785 if (ascending && curr_migr_unit < (num_migr_units-1))
7786 curr_migr_unit++;
7787
7788 migr_rec->curr_migr_unit = __le32_to_cpu(curr_migr_unit);
7789 super->migr_rec->rec_status = __cpu_to_le32(UNIT_SRC_NORMAL);
7790 if (write_imsm_migr_rec(st) == 0) {
7791 __u64 blocks_per_unit = blocks_per_migr_unit(super, id->dev);
7792 info->reshape_progress = curr_migr_unit * blocks_per_unit;
7793 retval = 0;
7794 }
7795
7796 abort:
7797 if (targets) {
7798 for (i = 0; i < new_disks; i++)
7799 if (targets[i])
7800 close(targets[i]);
7801 free(targets);
7802 }
7803 free(buf);
7804 return retval;
7805 }
7806
7807 static char disk_by_path[] = "/dev/disk/by-path/";
7808
7809 static const char *imsm_get_disk_controller_domain(const char *path)
7810 {
7811 char disk_path[PATH_MAX];
7812 char *drv=NULL;
7813 struct stat st;
7814
7815 strncpy(disk_path, disk_by_path, PATH_MAX - 1);
7816 strncat(disk_path, path, PATH_MAX - strlen(disk_path) - 1);
7817 if (stat(disk_path, &st) == 0) {
7818 struct sys_dev* hba;
7819 char *path=NULL;
7820
7821 path = devt_to_devpath(st.st_rdev);
7822 if (path == NULL)
7823 return "unknown";
7824 hba = find_disk_attached_hba(-1, path);
7825 if (hba && hba->type == SYS_DEV_SAS)
7826 drv = "isci";
7827 else if (hba && hba->type == SYS_DEV_SATA)
7828 drv = "ahci";
7829 else
7830 drv = "unknown";
7831 dprintf("path: %s hba: %s attached: %s\n",
7832 path, (hba) ? hba->path : "NULL", drv);
7833 free(path);
7834 if (hba)
7835 free_sys_dev(&hba);
7836 }
7837 return drv;
7838 }
7839
7840 static int imsm_find_array_minor_by_subdev(int subdev, int container, int *minor)
7841 {
7842 char subdev_name[20];
7843 struct mdstat_ent *mdstat;
7844
7845 sprintf(subdev_name, "%d", subdev);
7846 mdstat = mdstat_by_subdev(subdev_name, container);
7847 if (!mdstat)
7848 return -1;
7849
7850 *minor = mdstat->devnum;
7851 free_mdstat(mdstat);
7852 return 0;
7853 }
7854
7855 static int imsm_reshape_is_allowed_on_container(struct supertype *st,
7856 struct geo_params *geo,
7857 int *old_raid_disks)
7858 {
7859 /* currently we only support increasing the number of devices
7860 * for a container. This increases the number of device for each
7861 * member array. They must all be RAID0 or RAID5.
7862 */
7863 int ret_val = 0;
7864 struct mdinfo *info, *member;
7865 int devices_that_can_grow = 0;
7866
7867 dprintf("imsm: imsm_reshape_is_allowed_on_container(ENTER): "
7868 "st->devnum = (%i)\n",
7869 st->devnum);
7870
7871 if (geo->size != -1 ||
7872 geo->level != UnSet ||
7873 geo->layout != UnSet ||
7874 geo->chunksize != 0 ||
7875 geo->raid_disks == UnSet) {
7876 dprintf("imsm: Container operation is allowed for "
7877 "raid disks number change only.\n");
7878 return ret_val;
7879 }
7880
7881 info = container_content_imsm(st, NULL);
7882 for (member = info; member; member = member->next) {
7883 int result;
7884 int minor;
7885
7886 dprintf("imsm: checking device_num: %i\n",
7887 member->container_member);
7888
7889 if (geo->raid_disks <= member->array.raid_disks) {
7890 /* we work on container for Online Capacity Expansion
7891 * only so raid_disks has to grow
7892 */
7893 dprintf("imsm: for container operation raid disks "
7894 "increase is required\n");
7895 break;
7896 }
7897
7898 if ((info->array.level != 0) &&
7899 (info->array.level != 5)) {
7900 /* we cannot use this container with other raid level
7901 */
7902 dprintf("imsm: for container operation wrong"
7903 " raid level (%i) detected\n",
7904 info->array.level);
7905 break;
7906 } else {
7907 /* check for platform support
7908 * for this raid level configuration
7909 */
7910 struct intel_super *super = st->sb;
7911 if (!is_raid_level_supported(super->orom,
7912 member->array.level,
7913 geo->raid_disks)) {
7914 dprintf("platform does not support raid%d with"
7915 " %d disk%s\n",
7916 info->array.level,
7917 geo->raid_disks,
7918 geo->raid_disks > 1 ? "s" : "");
7919 break;
7920 }
7921 /* check if component size is aligned to chunk size
7922 */
7923 if (info->component_size %
7924 (info->array.chunk_size/512)) {
7925 dprintf("Component size is not aligned to "
7926 "chunk size\n");
7927 break;
7928 }
7929 }
7930
7931 if (*old_raid_disks &&
7932 info->array.raid_disks != *old_raid_disks)
7933 break;
7934 *old_raid_disks = info->array.raid_disks;
7935
7936 /* All raid5 and raid0 volumes in container
7937 * have to be ready for Online Capacity Expansion
7938 * so they need to be assembled. We have already
7939 * checked that no recovery etc is happening.
7940 */
7941 result = imsm_find_array_minor_by_subdev(member->container_member,
7942 st->container_dev,
7943 &minor);
7944 if (result < 0) {
7945 dprintf("imsm: cannot find array\n");
7946 break;
7947 }
7948 devices_that_can_grow++;
7949 }
7950 sysfs_free(info);
7951 if (!member && devices_that_can_grow)
7952 ret_val = 1;
7953
7954 if (ret_val)
7955 dprintf("\tContainer operation allowed\n");
7956 else
7957 dprintf("\tError: %i\n", ret_val);
7958
7959 return ret_val;
7960 }
7961
7962 /* Function: get_spares_for_grow
7963 * Description: Allocates memory and creates list of spare devices
7964 * avaliable in container. Checks if spare drive size is acceptable.
7965 * Parameters: Pointer to the supertype structure
7966 * Returns: Pointer to the list of spare devices (mdinfo structure) on success,
7967 * NULL if fail
7968 */
7969 static struct mdinfo *get_spares_for_grow(struct supertype *st)
7970 {
7971 unsigned long long min_size = min_acceptable_spare_size_imsm(st);
7972 return container_choose_spares(st, min_size, NULL, NULL, NULL, 0);
7973 }
7974
7975 /******************************************************************************
7976 * function: imsm_create_metadata_update_for_reshape
7977 * Function creates update for whole IMSM container.
7978 *
7979 ******************************************************************************/
7980 static int imsm_create_metadata_update_for_reshape(
7981 struct supertype *st,
7982 struct geo_params *geo,
7983 int old_raid_disks,
7984 struct imsm_update_reshape **updatep)
7985 {
7986 struct intel_super *super = st->sb;
7987 struct imsm_super *mpb = super->anchor;
7988 int update_memory_size = 0;
7989 struct imsm_update_reshape *u = NULL;
7990 struct mdinfo *spares = NULL;
7991 int i;
7992 int delta_disks = 0;
7993 struct mdinfo *dev;
7994
7995 dprintf("imsm_update_metadata_for_reshape(enter) raid_disks = %i\n",
7996 geo->raid_disks);
7997
7998 delta_disks = geo->raid_disks - old_raid_disks;
7999
8000 /* size of all update data without anchor */
8001 update_memory_size = sizeof(struct imsm_update_reshape);
8002
8003 /* now add space for spare disks that we need to add. */
8004 update_memory_size += sizeof(u->new_disks[0]) * (delta_disks - 1);
8005
8006 u = calloc(1, update_memory_size);
8007 if (u == NULL) {
8008 dprintf("error: "
8009 "cannot get memory for imsm_update_reshape update\n");
8010 return 0;
8011 }
8012 u->type = update_reshape_container_disks;
8013 u->old_raid_disks = old_raid_disks;
8014 u->new_raid_disks = geo->raid_disks;
8015
8016 /* now get spare disks list
8017 */
8018 spares = get_spares_for_grow(st);
8019
8020 if (spares == NULL
8021 || delta_disks > spares->array.spare_disks) {
8022 fprintf(stderr, Name ": imsm: ERROR: Cannot get spare devices "
8023 "for %s.\n", geo->dev_name);
8024 goto abort;
8025 }
8026
8027 /* we have got spares
8028 * update disk list in imsm_disk list table in anchor
8029 */
8030 dprintf("imsm: %i spares are available.\n\n",
8031 spares->array.spare_disks);
8032
8033 dev = spares->devs;
8034 for (i = 0; i < delta_disks; i++) {
8035 struct dl *dl;
8036
8037 if (dev == NULL)
8038 break;
8039 u->new_disks[i] = makedev(dev->disk.major,
8040 dev->disk.minor);
8041 dl = get_disk_super(super, dev->disk.major, dev->disk.minor);
8042 dl->index = mpb->num_disks;
8043 mpb->num_disks++;
8044 dev = dev->next;
8045 }
8046
8047 abort:
8048 /* free spares
8049 */
8050 sysfs_free(spares);
8051
8052 dprintf("imsm: reshape update preparation :");
8053 if (i == delta_disks) {
8054 dprintf(" OK\n");
8055 *updatep = u;
8056 return update_memory_size;
8057 }
8058 free(u);
8059 dprintf(" Error\n");
8060
8061 return 0;
8062 }
8063
8064 /******************************************************************************
8065 * function: imsm_create_metadata_update_for_migration()
8066 * Creates update for IMSM array.
8067 *
8068 ******************************************************************************/
8069 static int imsm_create_metadata_update_for_migration(
8070 struct supertype *st,
8071 struct geo_params *geo,
8072 struct imsm_update_reshape_migration **updatep)
8073 {
8074 struct intel_super *super = st->sb;
8075 int update_memory_size = 0;
8076 struct imsm_update_reshape_migration *u = NULL;
8077 struct imsm_dev *dev;
8078 int previous_level = -1;
8079
8080 dprintf("imsm_create_metadata_update_for_migration(enter)"
8081 " New Level = %i\n", geo->level);
8082
8083 /* size of all update data without anchor */
8084 update_memory_size = sizeof(struct imsm_update_reshape_migration);
8085
8086 u = calloc(1, update_memory_size);
8087 if (u == NULL) {
8088 dprintf("error: cannot get memory for "
8089 "imsm_create_metadata_update_for_migration\n");
8090 return 0;
8091 }
8092 u->type = update_reshape_migration;
8093 u->subdev = super->current_vol;
8094 u->new_level = geo->level;
8095 u->new_layout = geo->layout;
8096 u->new_raid_disks = u->old_raid_disks = geo->raid_disks;
8097 u->new_disks[0] = -1;
8098 u->new_chunksize = -1;
8099
8100 dev = get_imsm_dev(super, u->subdev);
8101 if (dev) {
8102 struct imsm_map *map;
8103
8104 map = get_imsm_map(dev, 0);
8105 if (map) {
8106 int current_chunk_size =
8107 __le16_to_cpu(map->blocks_per_strip) / 2;
8108
8109 if (geo->chunksize != current_chunk_size) {
8110 u->new_chunksize = geo->chunksize / 1024;
8111 dprintf("imsm: "
8112 "chunk size change from %i to %i\n",
8113 current_chunk_size, u->new_chunksize);
8114 }
8115 previous_level = map->raid_level;
8116 }
8117 }
8118 if ((geo->level == 5) && (previous_level == 0)) {
8119 struct mdinfo *spares = NULL;
8120
8121 u->new_raid_disks++;
8122 spares = get_spares_for_grow(st);
8123 if ((spares == NULL) || (spares->array.spare_disks < 1)) {
8124 free(u);
8125 sysfs_free(spares);
8126 update_memory_size = 0;
8127 dprintf("error: cannot get spare device "
8128 "for requested migration");
8129 return 0;
8130 }
8131 sysfs_free(spares);
8132 }
8133 dprintf("imsm: reshape update preparation : OK\n");
8134 *updatep = u;
8135
8136 return update_memory_size;
8137 }
8138
8139 static void imsm_update_metadata_locally(struct supertype *st,
8140 void *buf, int len)
8141 {
8142 struct metadata_update mu;
8143
8144 mu.buf = buf;
8145 mu.len = len;
8146 mu.space = NULL;
8147 mu.space_list = NULL;
8148 mu.next = NULL;
8149 imsm_prepare_update(st, &mu);
8150 imsm_process_update(st, &mu);
8151
8152 while (mu.space_list) {
8153 void **space = mu.space_list;
8154 mu.space_list = *space;
8155 free(space);
8156 }
8157 }
8158
8159 /***************************************************************************
8160 * Function: imsm_analyze_change
8161 * Description: Function analyze change for single volume
8162 * and validate if transition is supported
8163 * Parameters: Geometry parameters, supertype structure
8164 * Returns: Operation type code on success, -1 if fail
8165 ****************************************************************************/
8166 enum imsm_reshape_type imsm_analyze_change(struct supertype *st,
8167 struct geo_params *geo)
8168 {
8169 struct mdinfo info;
8170 int change = -1;
8171 int check_devs = 0;
8172 int chunk;
8173
8174 getinfo_super_imsm_volume(st, &info, NULL);
8175
8176 if ((geo->level != info.array.level) &&
8177 (geo->level >= 0) &&
8178 (geo->level != UnSet)) {
8179 switch (info.array.level) {
8180 case 0:
8181 if (geo->level == 5) {
8182 change = CH_MIGRATION;
8183 check_devs = 1;
8184 }
8185 if (geo->level == 10) {
8186 change = CH_TAKEOVER;
8187 check_devs = 1;
8188 }
8189 break;
8190 case 1:
8191 if (geo->level == 0) {
8192 change = CH_TAKEOVER;
8193 check_devs = 1;
8194 }
8195 break;
8196 case 10:
8197 if (geo->level == 0) {
8198 change = CH_TAKEOVER;
8199 check_devs = 1;
8200 }
8201 break;
8202 }
8203 if (change == -1) {
8204 fprintf(stderr,
8205 Name " Error. Level Migration from %d to %d "
8206 "not supported!\n",
8207 info.array.level, geo->level);
8208 goto analyse_change_exit;
8209 }
8210 } else
8211 geo->level = info.array.level;
8212
8213 if ((geo->layout != info.array.layout)
8214 && ((geo->layout != UnSet) && (geo->layout != -1))) {
8215 change = CH_MIGRATION;
8216 if ((info.array.layout == 0)
8217 && (info.array.level == 5)
8218 && (geo->layout == 5)) {
8219 /* reshape 5 -> 4 */
8220 } else if ((info.array.layout == 5)
8221 && (info.array.level == 5)
8222 && (geo->layout == 0)) {
8223 /* reshape 4 -> 5 */
8224 geo->layout = 0;
8225 geo->level = 5;
8226 } else {
8227 fprintf(stderr,
8228 Name " Error. Layout Migration from %d to %d "
8229 "not supported!\n",
8230 info.array.layout, geo->layout);
8231 change = -1;
8232 goto analyse_change_exit;
8233 }
8234 } else
8235 geo->layout = info.array.layout;
8236
8237 if ((geo->chunksize > 0) && (geo->chunksize != UnSet)
8238 && (geo->chunksize != info.array.chunk_size))
8239 change = CH_MIGRATION;
8240 else
8241 geo->chunksize = info.array.chunk_size;
8242
8243 chunk = geo->chunksize / 1024;
8244 if (!validate_geometry_imsm(st,
8245 geo->level,
8246 geo->layout,
8247 geo->raid_disks,
8248 &chunk,
8249 geo->size,
8250 0, 0, 1))
8251 change = -1;
8252
8253 if (check_devs) {
8254 struct intel_super *super = st->sb;
8255 struct imsm_super *mpb = super->anchor;
8256
8257 if (mpb->num_raid_devs > 1) {
8258 fprintf(stderr,
8259 Name " Error. Cannot perform operation on %s"
8260 "- for this operation it MUST be single "
8261 "array in container\n",
8262 geo->dev_name);
8263 change = -1;
8264 }
8265 }
8266
8267 analyse_change_exit:
8268
8269 return change;
8270 }
8271
8272 int imsm_takeover(struct supertype *st, struct geo_params *geo)
8273 {
8274 struct intel_super *super = st->sb;
8275 struct imsm_update_takeover *u;
8276
8277 u = malloc(sizeof(struct imsm_update_takeover));
8278 if (u == NULL)
8279 return 1;
8280
8281 u->type = update_takeover;
8282 u->subarray = super->current_vol;
8283
8284 /* 10->0 transition */
8285 if (geo->level == 0)
8286 u->direction = R10_TO_R0;
8287
8288 /* 0->10 transition */
8289 if (geo->level == 10)
8290 u->direction = R0_TO_R10;
8291
8292 /* update metadata locally */
8293 imsm_update_metadata_locally(st, u,
8294 sizeof(struct imsm_update_takeover));
8295 /* and possibly remotely */
8296 if (st->update_tail)
8297 append_metadata_update(st, u,
8298 sizeof(struct imsm_update_takeover));
8299 else
8300 free(u);
8301
8302 return 0;
8303 }
8304
8305 static int warn_user_about_risk(void)
8306 {
8307 int rv = 0;
8308
8309 fprintf(stderr,
8310 "\nThis is an experimental feature. Data on the RAID volume(s) "
8311 "can be lost!!!\n\n"
8312 "To continue command execution please make sure that\n"
8313 "the grow process will not be interrupted. Use safe power\n"
8314 "supply to avoid unexpected system reboot. Make sure that\n"
8315 "reshaped container is not assembled automatically during\n"
8316 "system boot.\n"
8317 "If reshape is interrupted, assemble array manually\n"
8318 "using e.g. '-Ac' option and up to date mdadm.conf file.\n"
8319 "Assembly in scan mode is not possible in such case.\n"
8320 "Growing container with boot array is not possible.\n"
8321 "If boot array reshape is interrupted, whole file system\n"
8322 "can be lost.\n\n");
8323 rv = ask("Do you want to continue? ");
8324 fprintf(stderr, "\n");
8325
8326 return rv;
8327 }
8328
8329 static int imsm_reshape_super(struct supertype *st, long long size, int level,
8330 int layout, int chunksize, int raid_disks,
8331 int delta_disks, char *backup, char *dev,
8332 int verbose)
8333 {
8334 int ret_val = 1;
8335 struct geo_params geo;
8336
8337 dprintf("imsm: reshape_super called.\n");
8338
8339 memset(&geo, 0, sizeof(struct geo_params));
8340
8341 geo.dev_name = dev;
8342 geo.dev_id = st->devnum;
8343 geo.size = size;
8344 geo.level = level;
8345 geo.layout = layout;
8346 geo.chunksize = chunksize;
8347 geo.raid_disks = raid_disks;
8348 if (delta_disks != UnSet)
8349 geo.raid_disks += delta_disks;
8350
8351 dprintf("\tfor level : %i\n", geo.level);
8352 dprintf("\tfor raid_disks : %i\n", geo.raid_disks);
8353
8354 if (experimental() == 0)
8355 return ret_val;
8356
8357 if (st->container_dev == st->devnum) {
8358 /* On container level we can only increase number of devices. */
8359 dprintf("imsm: info: Container operation\n");
8360 int old_raid_disks = 0;
8361
8362 /* this warning will be removed when imsm checkpointing
8363 * will be implemented, and restoring from check-point
8364 * operation will be transparent for reboot process
8365 */
8366 if (warn_user_about_risk() == 0)
8367 return ret_val;
8368
8369 if (imsm_reshape_is_allowed_on_container(
8370 st, &geo, &old_raid_disks)) {
8371 struct imsm_update_reshape *u = NULL;
8372 int len;
8373
8374 len = imsm_create_metadata_update_for_reshape(
8375 st, &geo, old_raid_disks, &u);
8376
8377 if (len <= 0) {
8378 dprintf("imsm: Cannot prepare update\n");
8379 goto exit_imsm_reshape_super;
8380 }
8381
8382 ret_val = 0;
8383 /* update metadata locally */
8384 imsm_update_metadata_locally(st, u, len);
8385 /* and possibly remotely */
8386 if (st->update_tail)
8387 append_metadata_update(st, u, len);
8388 else
8389 free(u);
8390
8391 } else {
8392 fprintf(stderr, Name ": (imsm) Operation "
8393 "is not allowed on this container\n");
8394 }
8395 } else {
8396 /* On volume level we support following operations
8397 * - takeover: raid10 -> raid0; raid0 -> raid10
8398 * - chunk size migration
8399 * - migration: raid5 -> raid0; raid0 -> raid5
8400 */
8401 struct intel_super *super = st->sb;
8402 struct intel_dev *dev = super->devlist;
8403 int change, devnum;
8404 dprintf("imsm: info: Volume operation\n");
8405 /* find requested device */
8406 while (dev) {
8407 imsm_find_array_minor_by_subdev(dev->index, st->container_dev, &devnum);
8408 if (devnum == geo.dev_id)
8409 break;
8410 dev = dev->next;
8411 }
8412 if (dev == NULL) {
8413 fprintf(stderr, Name " Cannot find %s (%i) subarray\n",
8414 geo.dev_name, geo.dev_id);
8415 goto exit_imsm_reshape_super;
8416 }
8417 super->current_vol = dev->index;
8418 change = imsm_analyze_change(st, &geo);
8419 switch (change) {
8420 case CH_TAKEOVER:
8421 ret_val = imsm_takeover(st, &geo);
8422 break;
8423 case CH_MIGRATION: {
8424 struct imsm_update_reshape_migration *u = NULL;
8425 int len =
8426 imsm_create_metadata_update_for_migration(
8427 st, &geo, &u);
8428 if (len < 1) {
8429 dprintf("imsm: "
8430 "Cannot prepare update\n");
8431 break;
8432 }
8433 ret_val = 0;
8434 /* update metadata locally */
8435 imsm_update_metadata_locally(st, u, len);
8436 /* and possibly remotely */
8437 if (st->update_tail)
8438 append_metadata_update(st, u, len);
8439 else
8440 free(u);
8441 }
8442 break;
8443 default:
8444 ret_val = 1;
8445 }
8446 }
8447
8448 exit_imsm_reshape_super:
8449 dprintf("imsm: reshape_super Exit code = %i\n", ret_val);
8450 return ret_val;
8451 }
8452
8453 /*******************************************************************************
8454 * Function: wait_for_reshape_imsm
8455 * Description: Function writes new sync_max value and waits until
8456 * reshape process reach new position
8457 * Parameters:
8458 * sra : general array info
8459 * to_complete : new sync_max position
8460 * ndata : number of disks in new array's layout
8461 * Returns:
8462 * 0 : success,
8463 * 1 : there is no reshape in progress,
8464 * -1 : fail
8465 ******************************************************************************/
8466 int wait_for_reshape_imsm(struct mdinfo *sra, unsigned long long to_complete,
8467 int ndata)
8468 {
8469 int fd = sysfs_get_fd(sra, NULL, "reshape_position");
8470 unsigned long long completed;
8471
8472 struct timeval timeout;
8473
8474 if (fd < 0)
8475 return 1;
8476
8477 sysfs_fd_get_ll(fd, &completed);
8478
8479 if (to_complete == 0) {/* reshape till the end of array */
8480 sysfs_set_str(sra, NULL, "sync_max", "max");
8481 to_complete = MaxSector;
8482 } else {
8483 if (completed > to_complete)
8484 return -1;
8485 if (sysfs_set_num(sra, NULL, "sync_max",
8486 to_complete / ndata) != 0) {
8487 close(fd);
8488 return -1;
8489 }
8490 }
8491
8492 /* FIXME should not need a timeout at all */
8493 timeout.tv_sec = 30;
8494 timeout.tv_usec = 0;
8495 do {
8496 char action[20];
8497 fd_set rfds;
8498 FD_ZERO(&rfds);
8499 FD_SET(fd, &rfds);
8500 select(fd+1, NULL, NULL, &rfds, &timeout);
8501 if (sysfs_fd_get_ll(fd, &completed) < 0) {
8502 close(fd);
8503 return 1;
8504 }
8505 if (sysfs_get_str(sra, NULL, "sync_action",
8506 action, 20) > 0 &&
8507 strncmp(action, "reshape", 7) != 0)
8508 break;
8509 } while (completed < to_complete);
8510 close(fd);
8511 return 0;
8512
8513 }
8514
8515 /*******************************************************************************
8516 * Function: check_degradation_change
8517 * Description: Check that array hasn't become failed.
8518 * Parameters:
8519 * info : for sysfs access
8520 * sources : source disks descriptors
8521 * degraded: previous degradation level
8522 * Returns:
8523 * degradation level
8524 ******************************************************************************/
8525 int check_degradation_change(struct mdinfo *info,
8526 int *sources,
8527 int degraded)
8528 {
8529 unsigned long long new_degraded;
8530 sysfs_get_ll(info, NULL, "degraded", &new_degraded);
8531 if (new_degraded != (unsigned long long)degraded) {
8532 /* check each device to ensure it is still working */
8533 struct mdinfo *sd;
8534 new_degraded = 0;
8535 for (sd = info->devs ; sd ; sd = sd->next) {
8536 if (sd->disk.state & (1<<MD_DISK_FAULTY))
8537 continue;
8538 if (sd->disk.state & (1<<MD_DISK_SYNC)) {
8539 char sbuf[20];
8540 if (sysfs_get_str(info,
8541 sd, "state", sbuf, 20) < 0 ||
8542 strstr(sbuf, "faulty") ||
8543 strstr(sbuf, "in_sync") == NULL) {
8544 /* this device is dead */
8545 sd->disk.state = (1<<MD_DISK_FAULTY);
8546 if (sd->disk.raid_disk >= 0 &&
8547 sources[sd->disk.raid_disk] >= 0) {
8548 close(sources[
8549 sd->disk.raid_disk]);
8550 sources[sd->disk.raid_disk] =
8551 -1;
8552 }
8553 new_degraded++;
8554 }
8555 }
8556 }
8557 }
8558
8559 return new_degraded;
8560 }
8561
8562 /*******************************************************************************
8563 * Function: imsm_manage_reshape
8564 * Description: Function finds array under reshape and it manages reshape
8565 * process. It creates stripes backups (if required) and sets
8566 * checheckpoits.
8567 * Parameters:
8568 * afd : Backup handle (nattive) - not used
8569 * sra : general array info
8570 * reshape : reshape parameters - not used
8571 * st : supertype structure
8572 * blocks : size of critical section [blocks]
8573 * fds : table of source device descriptor
8574 * offsets : start of array (offest per devices)
8575 * dests : not used
8576 * destfd : table of destination device descriptor
8577 * destoffsets : table of destination offsets (per device)
8578 * Returns:
8579 * 1 : success, reshape is done
8580 * 0 : fail
8581 ******************************************************************************/
8582 static int imsm_manage_reshape(
8583 int afd, struct mdinfo *sra, struct reshape *reshape,
8584 struct supertype *st, unsigned long backup_blocks,
8585 int *fds, unsigned long long *offsets,
8586 int dests, int *destfd, unsigned long long *destoffsets)
8587 {
8588 int ret_val = 0;
8589 struct intel_super *super = st->sb;
8590 struct intel_dev *dv = NULL;
8591 struct imsm_dev *dev = NULL;
8592 struct imsm_map *map_src, *map_dest;
8593 int migr_vol_qan = 0;
8594 int ndata, odata; /* [bytes] */
8595 int chunk; /* [bytes] */
8596 struct migr_record *migr_rec;
8597 char *buf = NULL;
8598 unsigned int buf_size; /* [bytes] */
8599 unsigned long long max_position; /* array size [bytes] */
8600 unsigned long long next_step; /* [blocks]/[bytes] */
8601 unsigned long long old_data_stripe_length;
8602 unsigned long long new_data_stripe_length;
8603 unsigned long long start_src; /* [bytes] */
8604 unsigned long long start; /* [bytes] */
8605 unsigned long long start_buf_shift; /* [bytes] */
8606 int degraded = 0;
8607
8608 if (!fds || !offsets || !destfd || !destoffsets || !sra)
8609 goto abort;
8610
8611 /* Find volume during the reshape */
8612 for (dv = super->devlist; dv; dv = dv->next) {
8613 if (dv->dev->vol.migr_type == MIGR_GEN_MIGR
8614 && dv->dev->vol.migr_state == 1) {
8615 dev = dv->dev;
8616 migr_vol_qan++;
8617 }
8618 }
8619 /* Only one volume can migrate at the same time */
8620 if (migr_vol_qan != 1) {
8621 fprintf(stderr, Name " : %s", migr_vol_qan ?
8622 "Number of migrating volumes greater than 1\n" :
8623 "There is no volume during migrationg\n");
8624 goto abort;
8625 }
8626
8627 map_src = get_imsm_map(dev, 1);
8628 if (map_src == NULL)
8629 goto abort;
8630 map_dest = get_imsm_map(dev, 0);
8631
8632 ndata = imsm_num_data_members(dev, 0);
8633 odata = imsm_num_data_members(dev, 1);
8634
8635 chunk = map_src->blocks_per_strip * 512;
8636 old_data_stripe_length = odata * chunk;
8637
8638 migr_rec = super->migr_rec;
8639
8640 /* [bytes] */
8641 sra->new_chunk = __le16_to_cpu(map_dest->blocks_per_strip) * 512;
8642 sra->new_level = map_dest->raid_level;
8643 new_data_stripe_length = sra->new_chunk * ndata;
8644
8645 /* initialize migration record for start condition */
8646 if (sra->reshape_progress == 0)
8647 init_migr_record_imsm(st, dev, sra);
8648
8649 /* size for data */
8650 buf_size = __le32_to_cpu(migr_rec->blocks_per_unit) * 512;
8651 /* extend buffer size for parity disk */
8652 buf_size += __le32_to_cpu(migr_rec->dest_depth_per_unit) * 512;
8653 /* add space for stripe aligment */
8654 buf_size += old_data_stripe_length;
8655 if (posix_memalign((void **)&buf, 4096, buf_size)) {
8656 dprintf("imsm: Cannot allocate checpoint buffer\n");
8657 goto abort;
8658 }
8659
8660 max_position =
8661 __le32_to_cpu(migr_rec->post_migr_vol_cap) +
8662 ((unsigned long long)__le32_to_cpu(
8663 migr_rec->post_migr_vol_cap_hi) << 32);
8664
8665 while (__le32_to_cpu(migr_rec->curr_migr_unit) <
8666 __le32_to_cpu(migr_rec->num_migr_units)) {
8667 /* current reshape position [blocks] */
8668 unsigned long long current_position =
8669 __le32_to_cpu(migr_rec->blocks_per_unit)
8670 * __le32_to_cpu(migr_rec->curr_migr_unit);
8671 unsigned long long border;
8672
8673 /* Check that array hasn't become failed.
8674 */
8675 degraded = check_degradation_change(sra, fds, degraded);
8676 if (degraded > 1) {
8677 dprintf("imsm: Abort reshape due to degradation"
8678 " level (%i)\n", degraded);
8679 goto abort;
8680 }
8681
8682 next_step = __le32_to_cpu(migr_rec->blocks_per_unit);
8683
8684 if ((current_position + next_step) > max_position)
8685 next_step = max_position - current_position;
8686
8687 start = (map_src->pba_of_lba0 + dev->reserved_blocks +
8688 current_position) * 512;
8689
8690 /* allign reading start to old geometry */
8691 start_buf_shift = start % old_data_stripe_length;
8692 start_src = start - start_buf_shift;
8693
8694 border = (start_src / odata) - (start / ndata);
8695 border /= 512;
8696 if (border <= __le32_to_cpu(migr_rec->dest_depth_per_unit)) {
8697 /* save critical stripes to buf
8698 * start - start address of current unit
8699 * to backup [bytes]
8700 * start_src - start address of current unit
8701 * to backup alligned to source array
8702 * [bytes]
8703 */
8704 unsigned long long next_step_filler = 0;
8705 unsigned long long copy_length = next_step * 512;
8706
8707 /* allign copy area length to stripe in old geometry */
8708 next_step_filler = ((copy_length + start_buf_shift)
8709 % old_data_stripe_length);
8710 if (next_step_filler)
8711 next_step_filler = (old_data_stripe_length
8712 - next_step_filler);
8713 dprintf("save_stripes() parameters: start = %llu,"
8714 "\tstart_src = %llu,\tnext_step*512 = %llu,"
8715 "\tstart_in_buf_shift = %llu,"
8716 "\tnext_step_filler = %llu\n",
8717 start, start_src, copy_length,
8718 start_buf_shift, next_step_filler);
8719
8720 if (save_stripes(fds, offsets, map_src->num_members,
8721 chunk, sra->array.level,
8722 sra->array.layout, 0, NULL, start_src,
8723 copy_length +
8724 next_step_filler + start_buf_shift,
8725 buf)) {
8726 dprintf("imsm: Cannot save stripes"
8727 " to buffer\n");
8728 goto abort;
8729 }
8730 /* Convert data to destination format and store it
8731 * in backup general migration area
8732 */
8733 if (save_backup_imsm(st, dev, sra,
8734 buf + start_buf_shift,
8735 ndata, copy_length)) {
8736 dprintf("imsm: Cannot save stripes to "
8737 "target devices\n");
8738 goto abort;
8739 }
8740 if (save_checkpoint_imsm(st, sra,
8741 UNIT_SRC_IN_CP_AREA)) {
8742 dprintf("imsm: Cannot write checkpoint to "
8743 "migration record (UNIT_SRC_IN_CP_AREA)\n");
8744 goto abort;
8745 }
8746 /* decrease backup_blocks */
8747 if (backup_blocks > (unsigned long)next_step)
8748 backup_blocks -= next_step;
8749 else
8750 backup_blocks = 0;
8751 }
8752 /* When data backed up, checkpoint stored,
8753 * kick the kernel to reshape unit of data
8754 */
8755 next_step = next_step + sra->reshape_progress;
8756 sysfs_set_num(sra, NULL, "suspend_lo", sra->reshape_progress);
8757 sysfs_set_num(sra, NULL, "suspend_hi", next_step);
8758
8759 /* wait until reshape finish */
8760 if (wait_for_reshape_imsm(sra, next_step, ndata) < 0) {
8761 dprintf("wait_for_reshape_imsm returned error!\n");
8762 goto abort;
8763 }
8764
8765 sra->reshape_progress = next_step;
8766
8767 if (save_checkpoint_imsm(st, sra, UNIT_SRC_NORMAL)) {
8768 dprintf("imsm: Cannot write checkpoint to "
8769 "migration record (UNIT_SRC_NORMAL)\n");
8770 goto abort;
8771 }
8772
8773 }
8774
8775 /* return '1' if done */
8776 ret_val = 1;
8777 abort:
8778 free(buf);
8779 abort_reshape(sra);
8780
8781 return ret_val;
8782 }
8783 #endif /* MDASSEMBLE */
8784
8785 struct superswitch super_imsm = {
8786 #ifndef MDASSEMBLE
8787 .examine_super = examine_super_imsm,
8788 .brief_examine_super = brief_examine_super_imsm,
8789 .brief_examine_subarrays = brief_examine_subarrays_imsm,
8790 .export_examine_super = export_examine_super_imsm,
8791 .detail_super = detail_super_imsm,
8792 .brief_detail_super = brief_detail_super_imsm,
8793 .write_init_super = write_init_super_imsm,
8794 .validate_geometry = validate_geometry_imsm,
8795 .add_to_super = add_to_super_imsm,
8796 .remove_from_super = remove_from_super_imsm,
8797 .detail_platform = detail_platform_imsm,
8798 .kill_subarray = kill_subarray_imsm,
8799 .update_subarray = update_subarray_imsm,
8800 .load_container = load_container_imsm,
8801 .default_geometry = default_geometry_imsm,
8802 .get_disk_controller_domain = imsm_get_disk_controller_domain,
8803 .reshape_super = imsm_reshape_super,
8804 .manage_reshape = imsm_manage_reshape,
8805 #endif
8806 .match_home = match_home_imsm,
8807 .uuid_from_super= uuid_from_super_imsm,
8808 .getinfo_super = getinfo_super_imsm,
8809 .getinfo_super_disks = getinfo_super_disks_imsm,
8810 .update_super = update_super_imsm,
8811
8812 .avail_size = avail_size_imsm,
8813 .min_acceptable_spare_size = min_acceptable_spare_size_imsm,
8814
8815 .compare_super = compare_super_imsm,
8816
8817 .load_super = load_super_imsm,
8818 .init_super = init_super_imsm,
8819 .store_super = store_super_imsm,
8820 .free_super = free_super_imsm,
8821 .match_metadata_desc = match_metadata_desc_imsm,
8822 .container_content = container_content_imsm,
8823
8824 .recover_backup = recover_backup_imsm,
8825
8826 .external = 1,
8827 .name = "imsm",
8828
8829 #ifndef MDASSEMBLE
8830 /* for mdmon */
8831 .open_new = imsm_open_new,
8832 .set_array_state= imsm_set_array_state,
8833 .set_disk = imsm_set_disk,
8834 .sync_metadata = imsm_sync_metadata,
8835 .activate_spare = imsm_activate_spare,
8836 .process_update = imsm_process_update,
8837 .prepare_update = imsm_prepare_update,
8838 #endif /* MDASSEMBLE */
8839 };