2d92c8e35fea504e735c8b2b96b54de439cf8877
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 /* supports RAID0 */
45 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
46 /* supports RAID1 */
47 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
48 /* supports RAID10 */
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 /* supports RAID1E */
51 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
52 /* supports RAID5 */
53 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
54 /* supports RAID CNG */
55 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
56 /* supports expanded stripe sizes of 256K, 512K and 1MB */
57 #define MPB_ATTRIB_EXP_STRIPE_SIZE __cpu_to_le32(0x00000040)
58
59 /* The OROM Support RST Caching of Volumes */
60 #define MPB_ATTRIB_NVM __cpu_to_le32(0x02000000)
61 /* The OROM supports creating disks greater than 2TB */
62 #define MPB_ATTRIB_2TB_DISK __cpu_to_le32(0x04000000)
63 /* The OROM supports Bad Block Management */
64 #define MPB_ATTRIB_BBM __cpu_to_le32(0x08000000)
65
66 /* THe OROM Supports NVM Caching of Volumes */
67 #define MPB_ATTRIB_NEVER_USE2 __cpu_to_le32(0x10000000)
68 /* The OROM supports creating volumes greater than 2TB */
69 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
70 /* originally for PMP, now it's wasted b/c. Never use this bit! */
71 #define MPB_ATTRIB_NEVER_USE __cpu_to_le32(0x40000000)
72 /* Verify MPB contents against checksum after reading MPB */
73 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
74
75 /* Define all supported attributes that have to be accepted by mdadm
76 */
77 #define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY | \
78 MPB_ATTRIB_2TB | \
79 MPB_ATTRIB_2TB_DISK | \
80 MPB_ATTRIB_RAID0 | \
81 MPB_ATTRIB_RAID1 | \
82 MPB_ATTRIB_RAID10 | \
83 MPB_ATTRIB_RAID5 | \
84 MPB_ATTRIB_EXP_STRIPE_SIZE | \
85 MPB_ATTRIB_BBM)
86
87 /* Define attributes that are unused but not harmful */
88 #define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
89
90 #define MPB_SECTOR_CNT 2210
91 #define IMSM_RESERVED_SECTORS 4096
92 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
93 #define SECT_PER_MB_SHIFT 11
94 #define MAX_SECTOR_SIZE 4096
95
96 /* Disk configuration info. */
97 #define IMSM_MAX_DEVICES 255
98 struct imsm_disk {
99 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
100 __u32 total_blocks_lo; /* 0xE8 - 0xEB total blocks lo */
101 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
102 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
103 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
104 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
105 __u32 status; /* 0xF0 - 0xF3 */
106 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
107 __u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */
108 #define IMSM_DISK_FILLERS 3
109 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
110 };
111
112 /* map selector for map managment
113 */
114 #define MAP_0 0
115 #define MAP_1 1
116 #define MAP_X -1
117
118 /* RAID map configuration infos. */
119 struct imsm_map {
120 __u32 pba_of_lba0_lo; /* start address of partition */
121 __u32 blocks_per_member_lo;/* blocks per member */
122 __u32 num_data_stripes_lo; /* number of data stripes */
123 __u16 blocks_per_strip;
124 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
125 #define IMSM_T_STATE_NORMAL 0
126 #define IMSM_T_STATE_UNINITIALIZED 1
127 #define IMSM_T_STATE_DEGRADED 2
128 #define IMSM_T_STATE_FAILED 3
129 __u8 raid_level;
130 #define IMSM_T_RAID0 0
131 #define IMSM_T_RAID1 1
132 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
133 __u8 num_members; /* number of member disks */
134 __u8 num_domains; /* number of parity domains */
135 __u8 failed_disk_num; /* valid only when state is degraded */
136 __u8 ddf;
137 __u32 pba_of_lba0_hi;
138 __u32 blocks_per_member_hi;
139 __u32 num_data_stripes_hi;
140 __u32 filler[4]; /* expansion area */
141 #define IMSM_ORD_REBUILD (1 << 24)
142 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
143 * top byte contains some flags
144 */
145 } __attribute__ ((packed));
146
147 struct imsm_vol {
148 __u32 curr_migr_unit;
149 __u32 checkpoint_id; /* id to access curr_migr_unit */
150 __u8 migr_state; /* Normal or Migrating */
151 #define MIGR_INIT 0
152 #define MIGR_REBUILD 1
153 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
154 #define MIGR_GEN_MIGR 3
155 #define MIGR_STATE_CHANGE 4
156 #define MIGR_REPAIR 5
157 __u8 migr_type; /* Initializing, Rebuilding, ... */
158 __u8 dirty;
159 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
160 __u16 verify_errors; /* number of mismatches */
161 __u16 bad_blocks; /* number of bad blocks during verify */
162 __u32 filler[4];
163 struct imsm_map map[1];
164 /* here comes another one if migr_state */
165 } __attribute__ ((packed));
166
167 struct imsm_dev {
168 __u8 volume[MAX_RAID_SERIAL_LEN];
169 __u32 size_low;
170 __u32 size_high;
171 #define DEV_BOOTABLE __cpu_to_le32(0x01)
172 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
173 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
174 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
175 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
176 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
177 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
178 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
179 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
180 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
181 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
182 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
183 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
184 __u32 status; /* Persistent RaidDev status */
185 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
186 __u8 migr_priority;
187 __u8 num_sub_vols;
188 __u8 tid;
189 __u8 cng_master_disk;
190 __u16 cache_policy;
191 __u8 cng_state;
192 __u8 cng_sub_state;
193 #define IMSM_DEV_FILLERS 10
194 __u32 filler[IMSM_DEV_FILLERS];
195 struct imsm_vol vol;
196 } __attribute__ ((packed));
197
198 struct imsm_super {
199 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
200 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
201 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
202 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
203 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
204 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
205 __u32 attributes; /* 0x34 - 0x37 */
206 __u8 num_disks; /* 0x38 Number of configured disks */
207 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
208 __u8 error_log_pos; /* 0x3A */
209 __u8 fill[1]; /* 0x3B */
210 __u32 cache_size; /* 0x3c - 0x40 in mb */
211 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
212 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
213 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
214 #define IMSM_FILLERS 35
215 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
216 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
217 /* here comes imsm_dev[num_raid_devs] */
218 /* here comes BBM logs */
219 } __attribute__ ((packed));
220
221 #define BBM_LOG_MAX_ENTRIES 254
222 #define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */
223 #define BBM_LOG_SIGNATURE 0xabadb10c
224
225 struct bbm_log_block_addr {
226 __u16 w1;
227 __u32 dw1;
228 } __attribute__ ((__packed__));
229
230 struct bbm_log_entry {
231 __u8 marked_count; /* Number of blocks marked - 1 */
232 __u8 disk_ordinal; /* Disk entry within the imsm_super */
233 struct bbm_log_block_addr defective_block_start;
234 } __attribute__ ((__packed__));
235
236 struct bbm_log {
237 __u32 signature; /* 0xABADB10C */
238 __u32 entry_count;
239 struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
240 } __attribute__ ((__packed__));
241
242 #ifndef MDASSEMBLE
243 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
244 #endif
245
246 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
247
248 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
249
250 #define MIGR_REC_BUF_SECTORS 1 /* size of migr_record i/o buffer in sectors */
251 #define MIGR_REC_SECTOR_POSITION 1 /* migr_record position offset on disk,
252 * MIGR_REC_BUF_SECTORS <= MIGR_REC_SECTOR_POS
253 */
254
255 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
256 * be recovered using srcMap */
257 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
258 * already been migrated and must
259 * be recovered from checkpoint area */
260 struct migr_record {
261 __u32 rec_status; /* Status used to determine how to restart
262 * migration in case it aborts
263 * in some fashion */
264 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
265 __u32 family_num; /* Family number of MPB
266 * containing the RaidDev
267 * that is migrating */
268 __u32 ascending_migr; /* True if migrating in increasing
269 * order of lbas */
270 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
271 __u32 dest_depth_per_unit; /* Num member blocks each destMap
272 * member disk
273 * advances per unit-of-operation */
274 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
275 __u32 dest_1st_member_lba; /* First member lba on first
276 * stripe of destination */
277 __u32 num_migr_units; /* Total num migration units-of-op */
278 __u32 post_migr_vol_cap; /* Size of volume after
279 * migration completes */
280 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
281 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
282 * migration ckpt record was read from
283 * (for recovered migrations) */
284 } __attribute__ ((__packed__));
285
286 struct md_list {
287 /* usage marker:
288 * 1: load metadata
289 * 2: metadata does not match
290 * 4: already checked
291 */
292 int used;
293 char *devname;
294 int found;
295 int container;
296 dev_t st_rdev;
297 struct md_list *next;
298 };
299
300 #define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
301
302 static __u8 migr_type(struct imsm_dev *dev)
303 {
304 if (dev->vol.migr_type == MIGR_VERIFY &&
305 dev->status & DEV_VERIFY_AND_FIX)
306 return MIGR_REPAIR;
307 else
308 return dev->vol.migr_type;
309 }
310
311 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
312 {
313 /* for compatibility with older oroms convert MIGR_REPAIR, into
314 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
315 */
316 if (migr_type == MIGR_REPAIR) {
317 dev->vol.migr_type = MIGR_VERIFY;
318 dev->status |= DEV_VERIFY_AND_FIX;
319 } else {
320 dev->vol.migr_type = migr_type;
321 dev->status &= ~DEV_VERIFY_AND_FIX;
322 }
323 }
324
325 static unsigned int sector_count(__u32 bytes, unsigned int sector_size)
326 {
327 return ROUND_UP(bytes, sector_size) / sector_size;
328 }
329
330 static unsigned int mpb_sectors(struct imsm_super *mpb,
331 unsigned int sector_size)
332 {
333 return sector_count(__le32_to_cpu(mpb->mpb_size), sector_size);
334 }
335
336 struct intel_dev {
337 struct imsm_dev *dev;
338 struct intel_dev *next;
339 unsigned index;
340 };
341
342 struct intel_hba {
343 enum sys_dev_type type;
344 char *path;
345 char *pci_id;
346 struct intel_hba *next;
347 };
348
349 enum action {
350 DISK_REMOVE = 1,
351 DISK_ADD
352 };
353 /* internal representation of IMSM metadata */
354 struct intel_super {
355 union {
356 void *buf; /* O_DIRECT buffer for reading/writing metadata */
357 struct imsm_super *anchor; /* immovable parameters */
358 };
359 union {
360 void *migr_rec_buf; /* buffer for I/O operations */
361 struct migr_record *migr_rec; /* migration record */
362 };
363 int clean_migration_record_by_mdmon; /* when reshape is switched to next
364 array, it indicates that mdmon is allowed to clean migration
365 record */
366 size_t len; /* size of the 'buf' allocation */
367 size_t extra_space; /* extra space in 'buf' that is not used yet */
368 void *next_buf; /* for realloc'ing buf from the manager */
369 size_t next_len;
370 int updates_pending; /* count of pending updates for mdmon */
371 int current_vol; /* index of raid device undergoing creation */
372 unsigned long long create_offset; /* common start for 'current_vol' */
373 __u32 random; /* random data for seeding new family numbers */
374 struct intel_dev *devlist;
375 unsigned int sector_size; /* sector size of used member drives */
376 struct dl {
377 struct dl *next;
378 int index;
379 __u8 serial[MAX_RAID_SERIAL_LEN];
380 int major, minor;
381 char *devname;
382 struct imsm_disk disk;
383 int fd;
384 int extent_cnt;
385 struct extent *e; /* for determining freespace @ create */
386 int raiddisk; /* slot to fill in autolayout */
387 enum action action;
388 } *disks, *current_disk;
389 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
390 active */
391 struct dl *missing; /* disks removed while we weren't looking */
392 struct bbm_log *bbm_log;
393 struct intel_hba *hba; /* device path of the raid controller for this metadata */
394 const struct imsm_orom *orom; /* platform firmware support */
395 struct intel_super *next; /* (temp) list for disambiguating family_num */
396 struct md_bb bb; /* memory for get_bad_blocks call */
397 };
398
399 struct intel_disk {
400 struct imsm_disk disk;
401 #define IMSM_UNKNOWN_OWNER (-1)
402 int owner;
403 struct intel_disk *next;
404 };
405
406 struct extent {
407 unsigned long long start, size;
408 };
409
410 /* definitions of reshape process types */
411 enum imsm_reshape_type {
412 CH_TAKEOVER,
413 CH_MIGRATION,
414 CH_ARRAY_SIZE,
415 };
416
417 /* definition of messages passed to imsm_process_update */
418 enum imsm_update_type {
419 update_activate_spare,
420 update_create_array,
421 update_kill_array,
422 update_rename_array,
423 update_add_remove_disk,
424 update_reshape_container_disks,
425 update_reshape_migration,
426 update_takeover,
427 update_general_migration_checkpoint,
428 update_size_change,
429 update_prealloc_badblocks_mem,
430 };
431
432 struct imsm_update_activate_spare {
433 enum imsm_update_type type;
434 struct dl *dl;
435 int slot;
436 int array;
437 struct imsm_update_activate_spare *next;
438 };
439
440 struct geo_params {
441 char devnm[32];
442 char *dev_name;
443 unsigned long long size;
444 int level;
445 int layout;
446 int chunksize;
447 int raid_disks;
448 };
449
450 enum takeover_direction {
451 R10_TO_R0,
452 R0_TO_R10
453 };
454 struct imsm_update_takeover {
455 enum imsm_update_type type;
456 int subarray;
457 enum takeover_direction direction;
458 };
459
460 struct imsm_update_reshape {
461 enum imsm_update_type type;
462 int old_raid_disks;
463 int new_raid_disks;
464
465 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
466 };
467
468 struct imsm_update_reshape_migration {
469 enum imsm_update_type type;
470 int old_raid_disks;
471 int new_raid_disks;
472 /* fields for array migration changes
473 */
474 int subdev;
475 int new_level;
476 int new_layout;
477 int new_chunksize;
478
479 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
480 };
481
482 struct imsm_update_size_change {
483 enum imsm_update_type type;
484 int subdev;
485 long long new_size;
486 };
487
488 struct imsm_update_general_migration_checkpoint {
489 enum imsm_update_type type;
490 __u32 curr_migr_unit;
491 };
492
493 struct disk_info {
494 __u8 serial[MAX_RAID_SERIAL_LEN];
495 };
496
497 struct imsm_update_create_array {
498 enum imsm_update_type type;
499 int dev_idx;
500 struct imsm_dev dev;
501 };
502
503 struct imsm_update_kill_array {
504 enum imsm_update_type type;
505 int dev_idx;
506 };
507
508 struct imsm_update_rename_array {
509 enum imsm_update_type type;
510 __u8 name[MAX_RAID_SERIAL_LEN];
511 int dev_idx;
512 };
513
514 struct imsm_update_add_remove_disk {
515 enum imsm_update_type type;
516 };
517
518 struct imsm_update_prealloc_bb_mem {
519 enum imsm_update_type type;
520 };
521
522 static const char *_sys_dev_type[] = {
523 [SYS_DEV_UNKNOWN] = "Unknown",
524 [SYS_DEV_SAS] = "SAS",
525 [SYS_DEV_SATA] = "SATA",
526 [SYS_DEV_NVME] = "NVMe",
527 [SYS_DEV_VMD] = "VMD"
528 };
529
530 const char *get_sys_dev_type(enum sys_dev_type type)
531 {
532 if (type >= SYS_DEV_MAX)
533 type = SYS_DEV_UNKNOWN;
534
535 return _sys_dev_type[type];
536 }
537
538 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
539 {
540 struct intel_hba *result = xmalloc(sizeof(*result));
541
542 result->type = device->type;
543 result->path = xstrdup(device->path);
544 result->next = NULL;
545 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
546 result->pci_id++;
547
548 return result;
549 }
550
551 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
552 {
553 struct intel_hba *result;
554
555 for (result = hba; result; result = result->next) {
556 if (result->type == device->type && strcmp(result->path, device->path) == 0)
557 break;
558 }
559 return result;
560 }
561
562 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
563 {
564 struct intel_hba *hba;
565
566 /* check if disk attached to Intel HBA */
567 hba = find_intel_hba(super->hba, device);
568 if (hba != NULL)
569 return 1;
570 /* Check if HBA is already attached to super */
571 if (super->hba == NULL) {
572 super->hba = alloc_intel_hba(device);
573 return 1;
574 }
575
576 hba = super->hba;
577 /* Intel metadata allows for all disks attached to the same type HBA.
578 * Do not support HBA types mixing
579 */
580 if (device->type != hba->type)
581 return 2;
582
583 /* Multiple same type HBAs can be used if they share the same OROM */
584 const struct imsm_orom *device_orom = get_orom_by_device_id(device->dev_id);
585
586 if (device_orom != super->orom)
587 return 2;
588
589 while (hba->next)
590 hba = hba->next;
591
592 hba->next = alloc_intel_hba(device);
593 return 1;
594 }
595
596 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
597 {
598 struct sys_dev *list, *elem;
599 char *disk_path;
600
601 if ((list = find_intel_devices()) == NULL)
602 return 0;
603
604 if (fd < 0)
605 disk_path = (char *) devname;
606 else
607 disk_path = diskfd_to_devpath(fd);
608
609 if (!disk_path)
610 return 0;
611
612 for (elem = list; elem; elem = elem->next)
613 if (path_attached_to_hba(disk_path, elem->path))
614 return elem;
615
616 if (disk_path != devname)
617 free(disk_path);
618
619 return NULL;
620 }
621
622 static int find_intel_hba_capability(int fd, struct intel_super *super,
623 char *devname);
624
625 static struct supertype *match_metadata_desc_imsm(char *arg)
626 {
627 struct supertype *st;
628
629 if (strcmp(arg, "imsm") != 0 &&
630 strcmp(arg, "default") != 0
631 )
632 return NULL;
633
634 st = xcalloc(1, sizeof(*st));
635 st->ss = &super_imsm;
636 st->max_devs = IMSM_MAX_DEVICES;
637 st->minor_version = 0;
638 st->sb = NULL;
639 return st;
640 }
641
642 #ifndef MDASSEMBLE
643 static __u8 *get_imsm_version(struct imsm_super *mpb)
644 {
645 return &mpb->sig[MPB_SIG_LEN];
646 }
647 #endif
648
649 /* retrieve a disk directly from the anchor when the anchor is known to be
650 * up-to-date, currently only at load time
651 */
652 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
653 {
654 if (index >= mpb->num_disks)
655 return NULL;
656 return &mpb->disk[index];
657 }
658
659 /* retrieve the disk description based on a index of the disk
660 * in the sub-array
661 */
662 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
663 {
664 struct dl *d;
665
666 for (d = super->disks; d; d = d->next)
667 if (d->index == index)
668 return d;
669
670 return NULL;
671 }
672 /* retrieve a disk from the parsed metadata */
673 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
674 {
675 struct dl *dl;
676
677 dl = get_imsm_dl_disk(super, index);
678 if (dl)
679 return &dl->disk;
680
681 return NULL;
682 }
683
684 /* generate a checksum directly from the anchor when the anchor is known to be
685 * up-to-date, currently only at load or write_super after coalescing
686 */
687 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
688 {
689 __u32 end = mpb->mpb_size / sizeof(end);
690 __u32 *p = (__u32 *) mpb;
691 __u32 sum = 0;
692
693 while (end--) {
694 sum += __le32_to_cpu(*p);
695 p++;
696 }
697
698 return sum - __le32_to_cpu(mpb->check_sum);
699 }
700
701 static size_t sizeof_imsm_map(struct imsm_map *map)
702 {
703 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
704 }
705
706 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
707 {
708 /* A device can have 2 maps if it is in the middle of a migration.
709 * If second_map is:
710 * MAP_0 - we return the first map
711 * MAP_1 - we return the second map if it exists, else NULL
712 * MAP_X - we return the second map if it exists, else the first
713 */
714 struct imsm_map *map = &dev->vol.map[0];
715 struct imsm_map *map2 = NULL;
716
717 if (dev->vol.migr_state)
718 map2 = (void *)map + sizeof_imsm_map(map);
719
720 switch (second_map) {
721 case MAP_0:
722 break;
723 case MAP_1:
724 map = map2;
725 break;
726 case MAP_X:
727 if (map2)
728 map = map2;
729 break;
730 default:
731 map = NULL;
732 }
733 return map;
734
735 }
736
737 /* return the size of the device.
738 * migr_state increases the returned size if map[0] were to be duplicated
739 */
740 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
741 {
742 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
743 sizeof_imsm_map(get_imsm_map(dev, MAP_0));
744
745 /* migrating means an additional map */
746 if (dev->vol.migr_state)
747 size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
748 else if (migr_state)
749 size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
750
751 return size;
752 }
753
754 #ifndef MDASSEMBLE
755 /* retrieve disk serial number list from a metadata update */
756 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
757 {
758 void *u = update;
759 struct disk_info *inf;
760
761 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
762 sizeof_imsm_dev(&update->dev, 0);
763
764 return inf;
765 }
766 #endif
767
768 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
769 {
770 int offset;
771 int i;
772 void *_mpb = mpb;
773
774 if (index >= mpb->num_raid_devs)
775 return NULL;
776
777 /* devices start after all disks */
778 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
779
780 for (i = 0; i <= index; i++)
781 if (i == index)
782 return _mpb + offset;
783 else
784 offset += sizeof_imsm_dev(_mpb + offset, 0);
785
786 return NULL;
787 }
788
789 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
790 {
791 struct intel_dev *dv;
792
793 if (index >= super->anchor->num_raid_devs)
794 return NULL;
795 for (dv = super->devlist; dv; dv = dv->next)
796 if (dv->index == index)
797 return dv->dev;
798 return NULL;
799 }
800
801 static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
802 *addr)
803 {
804 return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) |
805 __le16_to_cpu(addr->w1));
806 }
807
808 static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
809 {
810 struct bbm_log_block_addr addr;
811
812 addr.w1 = __cpu_to_le16((__u16)(sec & 0xffff));
813 addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff);
814 return addr;
815 }
816
817 #ifndef MDASSEMBLE
818 /* get size of the bbm log */
819 static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
820 {
821 if (!log || log->entry_count == 0)
822 return 0;
823
824 return sizeof(log->signature) +
825 sizeof(log->entry_count) +
826 log->entry_count * sizeof(struct bbm_log_entry);
827 }
828
829 /* check if bad block is not partially stored in bbm log */
830 static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned
831 long long sector, const int length, __u32 *pos)
832 {
833 __u32 i;
834
835 for (i = *pos; i < log->entry_count; i++) {
836 struct bbm_log_entry *entry = &log->marked_block_entries[i];
837 unsigned long long bb_start;
838 unsigned long long bb_end;
839
840 bb_start = __le48_to_cpu(&entry->defective_block_start);
841 bb_end = bb_start + (entry->marked_count + 1);
842
843 if ((entry->disk_ordinal == idx) && (bb_start >= sector) &&
844 (bb_end <= sector + length)) {
845 *pos = i;
846 return 1;
847 }
848 }
849 return 0;
850 }
851
852 /* record new bad block in bbm log */
853 static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned
854 long long sector, int length)
855 {
856 int new_bb = 0;
857 __u32 pos = 0;
858 struct bbm_log_entry *entry = NULL;
859
860 while (is_stored_in_bbm(log, idx, sector, length, &pos)) {
861 struct bbm_log_entry *e = &log->marked_block_entries[pos];
862
863 if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) &&
864 (__le48_to_cpu(&e->defective_block_start) == sector)) {
865 sector += BBM_LOG_MAX_LBA_ENTRY_VAL;
866 length -= BBM_LOG_MAX_LBA_ENTRY_VAL;
867 pos = pos + 1;
868 continue;
869 }
870 entry = e;
871 break;
872 }
873
874 if (entry) {
875 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
876 BBM_LOG_MAX_LBA_ENTRY_VAL;
877 entry->defective_block_start = __cpu_to_le48(sector);
878 entry->marked_count = cnt - 1;
879 if (cnt == length)
880 return 1;
881 sector += cnt;
882 length -= cnt;
883 }
884
885 new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) /
886 BBM_LOG_MAX_LBA_ENTRY_VAL;
887 if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES)
888 return 0;
889
890 while (length > 0) {
891 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
892 BBM_LOG_MAX_LBA_ENTRY_VAL;
893 struct bbm_log_entry *entry =
894 &log->marked_block_entries[log->entry_count];
895
896 entry->defective_block_start = __cpu_to_le48(sector);
897 entry->marked_count = cnt - 1;
898 entry->disk_ordinal = idx;
899
900 sector += cnt;
901 length -= cnt;
902
903 log->entry_count++;
904 }
905
906 return new_bb;
907 }
908
909 /* clear all bad blocks for given disk */
910 static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx)
911 {
912 __u32 i = 0;
913
914 while (i < log->entry_count) {
915 struct bbm_log_entry *entries = log->marked_block_entries;
916
917 if (entries[i].disk_ordinal == idx) {
918 if (i < log->entry_count - 1)
919 entries[i] = entries[log->entry_count - 1];
920 log->entry_count--;
921 } else {
922 i++;
923 }
924 }
925 }
926
927 /* clear given bad block */
928 static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
929 long long sector, const int length) {
930 __u32 i = 0;
931
932 while (i < log->entry_count) {
933 struct bbm_log_entry *entries = log->marked_block_entries;
934
935 if ((entries[i].disk_ordinal == idx) &&
936 (__le48_to_cpu(&entries[i].defective_block_start) ==
937 sector) && (entries[i].marked_count + 1 == length)) {
938 if (i < log->entry_count - 1)
939 entries[i] = entries[log->entry_count - 1];
940 log->entry_count--;
941 break;
942 }
943 i++;
944 }
945
946 return 1;
947 }
948 #endif /* MDASSEMBLE */
949
950 /* allocate and load BBM log from metadata */
951 static int load_bbm_log(struct intel_super *super)
952 {
953 struct imsm_super *mpb = super->anchor;
954 __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
955
956 super->bbm_log = xcalloc(1, sizeof(struct bbm_log));
957 if (!super->bbm_log)
958 return 1;
959
960 if (bbm_log_size) {
961 struct bbm_log *log = (void *)mpb +
962 __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
963
964 __u32 entry_count;
965
966 if (bbm_log_size < sizeof(log->signature) +
967 sizeof(log->entry_count))
968 return 2;
969
970 entry_count = __le32_to_cpu(log->entry_count);
971 if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) ||
972 (entry_count > BBM_LOG_MAX_ENTRIES))
973 return 3;
974
975 if (bbm_log_size !=
976 sizeof(log->signature) + sizeof(log->entry_count) +
977 entry_count * sizeof(struct bbm_log_entry))
978 return 4;
979
980 memcpy(super->bbm_log, log, bbm_log_size);
981 } else {
982 super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE);
983 super->bbm_log->entry_count = 0;
984 }
985
986 return 0;
987 }
988
989 /* checks if bad block is within volume boundaries */
990 static int is_bad_block_in_volume(const struct bbm_log_entry *entry,
991 const unsigned long long start_sector,
992 const unsigned long long size)
993 {
994 unsigned long long bb_start;
995 unsigned long long bb_end;
996
997 bb_start = __le48_to_cpu(&entry->defective_block_start);
998 bb_end = bb_start + (entry->marked_count + 1);
999
1000 if (((bb_start >= start_sector) && (bb_start < start_sector + size)) ||
1001 ((bb_end >= start_sector) && (bb_end <= start_sector + size)))
1002 return 1;
1003
1004 return 0;
1005 }
1006
1007 /* get list of bad blocks on a drive for a volume */
1008 static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx,
1009 const unsigned long long start_sector,
1010 const unsigned long long size,
1011 struct md_bb *bbs)
1012 {
1013 __u32 count = 0;
1014 __u32 i;
1015
1016 for (i = 0; i < log->entry_count; i++) {
1017 const struct bbm_log_entry *ent =
1018 &log->marked_block_entries[i];
1019 struct md_bb_entry *bb;
1020
1021 if ((ent->disk_ordinal == idx) &&
1022 is_bad_block_in_volume(ent, start_sector, size)) {
1023
1024 if (!bbs->entries) {
1025 bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES *
1026 sizeof(*bb));
1027 if (!bbs->entries)
1028 break;
1029 }
1030
1031 bb = &bbs->entries[count++];
1032 bb->sector = __le48_to_cpu(&ent->defective_block_start);
1033 bb->length = ent->marked_count + 1;
1034 }
1035 }
1036 bbs->count = count;
1037 }
1038
1039 /*
1040 * for second_map:
1041 * == MAP_0 get first map
1042 * == MAP_1 get second map
1043 * == MAP_X than get map according to the current migr_state
1044 */
1045 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
1046 int slot,
1047 int second_map)
1048 {
1049 struct imsm_map *map;
1050
1051 map = get_imsm_map(dev, second_map);
1052
1053 /* top byte identifies disk under rebuild */
1054 return __le32_to_cpu(map->disk_ord_tbl[slot]);
1055 }
1056
1057 #define ord_to_idx(ord) (((ord) << 8) >> 8)
1058 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
1059 {
1060 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
1061
1062 return ord_to_idx(ord);
1063 }
1064
1065 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
1066 {
1067 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
1068 }
1069
1070 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
1071 {
1072 int slot;
1073 __u32 ord;
1074
1075 for (slot = 0; slot < map->num_members; slot++) {
1076 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
1077 if (ord_to_idx(ord) == idx)
1078 return slot;
1079 }
1080
1081 return -1;
1082 }
1083
1084 static int get_imsm_raid_level(struct imsm_map *map)
1085 {
1086 if (map->raid_level == 1) {
1087 if (map->num_members == 2)
1088 return 1;
1089 else
1090 return 10;
1091 }
1092
1093 return map->raid_level;
1094 }
1095
1096 static int cmp_extent(const void *av, const void *bv)
1097 {
1098 const struct extent *a = av;
1099 const struct extent *b = bv;
1100 if (a->start < b->start)
1101 return -1;
1102 if (a->start > b->start)
1103 return 1;
1104 return 0;
1105 }
1106
1107 static int count_memberships(struct dl *dl, struct intel_super *super)
1108 {
1109 int memberships = 0;
1110 int i;
1111
1112 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1113 struct imsm_dev *dev = get_imsm_dev(super, i);
1114 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1115
1116 if (get_imsm_disk_slot(map, dl->index) >= 0)
1117 memberships++;
1118 }
1119
1120 return memberships;
1121 }
1122
1123 static __u32 imsm_min_reserved_sectors(struct intel_super *super);
1124
1125 static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
1126 {
1127 if (lo == 0 || hi == 0)
1128 return 1;
1129 *lo = __le32_to_cpu((unsigned)n);
1130 *hi = __le32_to_cpu((unsigned)(n >> 32));
1131 return 0;
1132 }
1133
1134 static unsigned long long join_u32(__u32 lo, __u32 hi)
1135 {
1136 return (unsigned long long)__le32_to_cpu(lo) |
1137 (((unsigned long long)__le32_to_cpu(hi)) << 32);
1138 }
1139
1140 static unsigned long long total_blocks(struct imsm_disk *disk)
1141 {
1142 if (disk == NULL)
1143 return 0;
1144 return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
1145 }
1146
1147 static unsigned long long pba_of_lba0(struct imsm_map *map)
1148 {
1149 if (map == NULL)
1150 return 0;
1151 return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi);
1152 }
1153
1154 static unsigned long long blocks_per_member(struct imsm_map *map)
1155 {
1156 if (map == NULL)
1157 return 0;
1158 return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
1159 }
1160
1161 static unsigned long long num_data_stripes(struct imsm_map *map)
1162 {
1163 if (map == NULL)
1164 return 0;
1165 return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
1166 }
1167
1168 static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
1169 {
1170 split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
1171 }
1172
1173 static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
1174 {
1175 split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
1176 }
1177
1178 static void set_blocks_per_member(struct imsm_map *map, unsigned long long n)
1179 {
1180 split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi);
1181 }
1182
1183 static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
1184 {
1185 split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
1186 }
1187
1188 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
1189 {
1190 /* find a list of used extents on the given physical device */
1191 struct extent *rv, *e;
1192 int i;
1193 int memberships = count_memberships(dl, super);
1194 __u32 reservation;
1195
1196 /* trim the reserved area for spares, so they can join any array
1197 * regardless of whether the OROM has assigned sectors from the
1198 * IMSM_RESERVED_SECTORS region
1199 */
1200 if (dl->index == -1)
1201 reservation = imsm_min_reserved_sectors(super);
1202 else
1203 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1204
1205 rv = xcalloc(sizeof(struct extent), (memberships + 1));
1206 e = rv;
1207
1208 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1209 struct imsm_dev *dev = get_imsm_dev(super, i);
1210 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1211
1212 if (get_imsm_disk_slot(map, dl->index) >= 0) {
1213 e->start = pba_of_lba0(map);
1214 e->size = blocks_per_member(map);
1215 e++;
1216 }
1217 }
1218 qsort(rv, memberships, sizeof(*rv), cmp_extent);
1219
1220 /* determine the start of the metadata
1221 * when no raid devices are defined use the default
1222 * ...otherwise allow the metadata to truncate the value
1223 * as is the case with older versions of imsm
1224 */
1225 if (memberships) {
1226 struct extent *last = &rv[memberships - 1];
1227 unsigned long long remainder;
1228
1229 remainder = total_blocks(&dl->disk) - (last->start + last->size);
1230 /* round down to 1k block to satisfy precision of the kernel
1231 * 'size' interface
1232 */
1233 remainder &= ~1UL;
1234 /* make sure remainder is still sane */
1235 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
1236 remainder = ROUND_UP(super->len, 512) >> 9;
1237 if (reservation > remainder)
1238 reservation = remainder;
1239 }
1240 e->start = total_blocks(&dl->disk) - reservation;
1241 e->size = 0;
1242 return rv;
1243 }
1244
1245 /* try to determine how much space is reserved for metadata from
1246 * the last get_extents() entry, otherwise fallback to the
1247 * default
1248 */
1249 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
1250 {
1251 struct extent *e;
1252 int i;
1253 __u32 rv;
1254
1255 /* for spares just return a minimal reservation which will grow
1256 * once the spare is picked up by an array
1257 */
1258 if (dl->index == -1)
1259 return MPB_SECTOR_CNT;
1260
1261 e = get_extents(super, dl);
1262 if (!e)
1263 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1264
1265 /* scroll to last entry */
1266 for (i = 0; e[i].size; i++)
1267 continue;
1268
1269 rv = total_blocks(&dl->disk) - e[i].start;
1270
1271 free(e);
1272
1273 return rv;
1274 }
1275
1276 static int is_spare(struct imsm_disk *disk)
1277 {
1278 return (disk->status & SPARE_DISK) == SPARE_DISK;
1279 }
1280
1281 static int is_configured(struct imsm_disk *disk)
1282 {
1283 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
1284 }
1285
1286 static int is_failed(struct imsm_disk *disk)
1287 {
1288 return (disk->status & FAILED_DISK) == FAILED_DISK;
1289 }
1290
1291 /* try to determine how much space is reserved for metadata from
1292 * the last get_extents() entry on the smallest active disk,
1293 * otherwise fallback to the default
1294 */
1295 static __u32 imsm_min_reserved_sectors(struct intel_super *super)
1296 {
1297 struct extent *e;
1298 int i;
1299 unsigned long long min_active;
1300 __u32 remainder;
1301 __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1302 struct dl *dl, *dl_min = NULL;
1303
1304 if (!super)
1305 return rv;
1306
1307 min_active = 0;
1308 for (dl = super->disks; dl; dl = dl->next) {
1309 if (dl->index < 0)
1310 continue;
1311 unsigned long long blocks = total_blocks(&dl->disk);
1312 if (blocks < min_active || min_active == 0) {
1313 dl_min = dl;
1314 min_active = blocks;
1315 }
1316 }
1317 if (!dl_min)
1318 return rv;
1319
1320 /* find last lba used by subarrays on the smallest active disk */
1321 e = get_extents(super, dl_min);
1322 if (!e)
1323 return rv;
1324 for (i = 0; e[i].size; i++)
1325 continue;
1326
1327 remainder = min_active - e[i].start;
1328 free(e);
1329
1330 /* to give priority to recovery we should not require full
1331 IMSM_RESERVED_SECTORS from the spare */
1332 rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
1333
1334 /* if real reservation is smaller use that value */
1335 return (remainder < rv) ? remainder : rv;
1336 }
1337
1338 /* Return minimum size of a spare that can be used in this array*/
1339 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
1340 {
1341 struct intel_super *super = st->sb;
1342 struct dl *dl;
1343 struct extent *e;
1344 int i;
1345 unsigned long long rv = 0;
1346
1347 if (!super)
1348 return rv;
1349 /* find first active disk in array */
1350 dl = super->disks;
1351 while (dl && (is_failed(&dl->disk) || dl->index == -1))
1352 dl = dl->next;
1353 if (!dl)
1354 return rv;
1355 /* find last lba used by subarrays */
1356 e = get_extents(super, dl);
1357 if (!e)
1358 return rv;
1359 for (i = 0; e[i].size; i++)
1360 continue;
1361 if (i > 0)
1362 rv = e[i-1].start + e[i-1].size;
1363 free(e);
1364
1365 /* add the amount of space needed for metadata */
1366 rv = rv + imsm_min_reserved_sectors(super);
1367
1368 return rv * 512;
1369 }
1370
1371 static int is_gen_migration(struct imsm_dev *dev);
1372
1373 #define IMSM_4K_DIV 8
1374
1375 #ifndef MDASSEMBLE
1376 static __u64 blocks_per_migr_unit(struct intel_super *super,
1377 struct imsm_dev *dev);
1378
1379 static void print_imsm_dev(struct intel_super *super,
1380 struct imsm_dev *dev,
1381 char *uuid,
1382 int disk_idx)
1383 {
1384 __u64 sz;
1385 int slot, i;
1386 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1387 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
1388 __u32 ord;
1389
1390 printf("\n");
1391 printf("[%.16s]:\n", dev->volume);
1392 printf(" UUID : %s\n", uuid);
1393 printf(" RAID Level : %d", get_imsm_raid_level(map));
1394 if (map2)
1395 printf(" <-- %d", get_imsm_raid_level(map2));
1396 printf("\n");
1397 printf(" Members : %d", map->num_members);
1398 if (map2)
1399 printf(" <-- %d", map2->num_members);
1400 printf("\n");
1401 printf(" Slots : [");
1402 for (i = 0; i < map->num_members; i++) {
1403 ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
1404 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1405 }
1406 printf("]");
1407 if (map2) {
1408 printf(" <-- [");
1409 for (i = 0; i < map2->num_members; i++) {
1410 ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
1411 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1412 }
1413 printf("]");
1414 }
1415 printf("\n");
1416 printf(" Failed disk : ");
1417 if (map->failed_disk_num == 0xff)
1418 printf("none");
1419 else
1420 printf("%i", map->failed_disk_num);
1421 printf("\n");
1422 slot = get_imsm_disk_slot(map, disk_idx);
1423 if (slot >= 0) {
1424 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
1425 printf(" This Slot : %d%s\n", slot,
1426 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
1427 } else
1428 printf(" This Slot : ?\n");
1429 sz = __le32_to_cpu(dev->size_high);
1430 sz <<= 32;
1431 sz += __le32_to_cpu(dev->size_low);
1432 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
1433 human_size(sz * 512));
1434 sz = blocks_per_member(map);
1435 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
1436 human_size(sz * 512));
1437 printf(" Sector Offset : %llu\n",
1438 pba_of_lba0(map));
1439 printf(" Num Stripes : %llu\n",
1440 num_data_stripes(map));
1441 printf(" Chunk Size : %u KiB",
1442 __le16_to_cpu(map->blocks_per_strip) / 2);
1443 if (map2)
1444 printf(" <-- %u KiB",
1445 __le16_to_cpu(map2->blocks_per_strip) / 2);
1446 printf("\n");
1447 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
1448 printf(" Migrate State : ");
1449 if (dev->vol.migr_state) {
1450 if (migr_type(dev) == MIGR_INIT)
1451 printf("initialize\n");
1452 else if (migr_type(dev) == MIGR_REBUILD)
1453 printf("rebuild\n");
1454 else if (migr_type(dev) == MIGR_VERIFY)
1455 printf("check\n");
1456 else if (migr_type(dev) == MIGR_GEN_MIGR)
1457 printf("general migration\n");
1458 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1459 printf("state change\n");
1460 else if (migr_type(dev) == MIGR_REPAIR)
1461 printf("repair\n");
1462 else
1463 printf("<unknown:%d>\n", migr_type(dev));
1464 } else
1465 printf("idle\n");
1466 printf(" Map State : %s", map_state_str[map->map_state]);
1467 if (dev->vol.migr_state) {
1468 struct imsm_map *map = get_imsm_map(dev, MAP_1);
1469
1470 printf(" <-- %s", map_state_str[map->map_state]);
1471 printf("\n Checkpoint : %u ",
1472 __le32_to_cpu(dev->vol.curr_migr_unit));
1473 if (is_gen_migration(dev) && (slot > 1 || slot < 0))
1474 printf("(N/A)");
1475 else
1476 printf("(%llu)", (unsigned long long)
1477 blocks_per_migr_unit(super, dev));
1478 }
1479 printf("\n");
1480 printf(" Dirty State : %s\n", dev->vol.dirty ? "dirty" : "clean");
1481 }
1482
1483 static void print_imsm_disk(struct imsm_disk *disk,
1484 int index,
1485 __u32 reserved,
1486 unsigned int sector_size) {
1487 char str[MAX_RAID_SERIAL_LEN + 1];
1488 __u64 sz;
1489
1490 if (index < -1 || !disk)
1491 return;
1492
1493 printf("\n");
1494 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1495 if (index >= 0)
1496 printf(" Disk%02d Serial : %s\n", index, str);
1497 else
1498 printf(" Disk Serial : %s\n", str);
1499 printf(" State :%s%s%s\n", is_spare(disk) ? " spare" : "",
1500 is_configured(disk) ? " active" : "",
1501 is_failed(disk) ? " failed" : "");
1502 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1503 sz = total_blocks(disk) - reserved;
1504 printf(" Usable Size : %llu%s\n",
1505 (unsigned long long)sz * 512 / sector_size,
1506 human_size(sz * 512));
1507 }
1508
1509 void convert_to_4k_imsm_migr_rec(struct intel_super *super)
1510 {
1511 struct migr_record *migr_rec = super->migr_rec;
1512
1513 migr_rec->blocks_per_unit /= IMSM_4K_DIV;
1514 migr_rec->ckpt_area_pba /= IMSM_4K_DIV;
1515 migr_rec->dest_1st_member_lba /= IMSM_4K_DIV;
1516 migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
1517 split_ull((join_u32(migr_rec->post_migr_vol_cap,
1518 migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
1519 &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
1520 }
1521
1522 void convert_to_4k_imsm_disk(struct imsm_disk *disk)
1523 {
1524 set_total_blocks(disk, (total_blocks(disk)/IMSM_4K_DIV));
1525 }
1526
1527 void convert_to_4k(struct intel_super *super)
1528 {
1529 struct imsm_super *mpb = super->anchor;
1530 struct imsm_disk *disk;
1531 int i;
1532 __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1533
1534 for (i = 0; i < mpb->num_disks ; i++) {
1535 disk = __get_imsm_disk(mpb, i);
1536 /* disk */
1537 convert_to_4k_imsm_disk(disk);
1538 }
1539 for (i = 0; i < mpb->num_raid_devs; i++) {
1540 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1541 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1542 /* dev */
1543 split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV),
1544 &dev->size_low, &dev->size_high);
1545 dev->vol.curr_migr_unit /= IMSM_4K_DIV;
1546
1547 /* map0 */
1548 set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV);
1549 map->blocks_per_strip /= IMSM_4K_DIV;
1550 set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1551
1552 if (dev->vol.migr_state) {
1553 /* map1 */
1554 map = get_imsm_map(dev, MAP_1);
1555 set_blocks_per_member(map,
1556 blocks_per_member(map)/IMSM_4K_DIV);
1557 map->blocks_per_strip /= IMSM_4K_DIV;
1558 set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1559 }
1560 }
1561 if (bbm_log_size) {
1562 struct bbm_log *log = (void *)mpb +
1563 __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1564 __u32 i;
1565
1566 for (i = 0; i < log->entry_count; i++) {
1567 struct bbm_log_entry *entry =
1568 &log->marked_block_entries[i];
1569
1570 __u8 count = entry->marked_count + 1;
1571 unsigned long long sector =
1572 __le48_to_cpu(&entry->defective_block_start);
1573
1574 entry->defective_block_start =
1575 __cpu_to_le48(sector/IMSM_4K_DIV);
1576 entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1;
1577 }
1578 }
1579
1580 mpb->check_sum = __gen_imsm_checksum(mpb);
1581 }
1582
1583 void examine_migr_rec_imsm(struct intel_super *super)
1584 {
1585 struct migr_record *migr_rec = super->migr_rec;
1586 struct imsm_super *mpb = super->anchor;
1587 int i;
1588
1589 for (i = 0; i < mpb->num_raid_devs; i++) {
1590 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1591 struct imsm_map *map;
1592 int slot = -1;
1593
1594 if (is_gen_migration(dev) == 0)
1595 continue;
1596
1597 printf("\nMigration Record Information:");
1598
1599 /* first map under migration */
1600 map = get_imsm_map(dev, MAP_0);
1601 if (map)
1602 slot = get_imsm_disk_slot(map, super->disks->index);
1603 if (map == NULL || slot > 1 || slot < 0) {
1604 printf(" Empty\n ");
1605 printf("Examine one of first two disks in array\n");
1606 break;
1607 }
1608 printf("\n Status : ");
1609 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1610 printf("Normal\n");
1611 else
1612 printf("Contains Data\n");
1613 printf(" Current Unit : %u\n",
1614 __le32_to_cpu(migr_rec->curr_migr_unit));
1615 printf(" Family : %u\n",
1616 __le32_to_cpu(migr_rec->family_num));
1617 printf(" Ascending : %u\n",
1618 __le32_to_cpu(migr_rec->ascending_migr));
1619 printf(" Blocks Per Unit : %u\n",
1620 __le32_to_cpu(migr_rec->blocks_per_unit));
1621 printf(" Dest. Depth Per Unit : %u\n",
1622 __le32_to_cpu(migr_rec->dest_depth_per_unit));
1623 printf(" Checkpoint Area pba : %u\n",
1624 __le32_to_cpu(migr_rec->ckpt_area_pba));
1625 printf(" First member lba : %u\n",
1626 __le32_to_cpu(migr_rec->dest_1st_member_lba));
1627 printf(" Total Number of Units : %u\n",
1628 __le32_to_cpu(migr_rec->num_migr_units));
1629 printf(" Size of volume : %u\n",
1630 __le32_to_cpu(migr_rec->post_migr_vol_cap));
1631 printf(" Expansion space for LBA64 : %u\n",
1632 __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1633 printf(" Record was read from : %u\n",
1634 __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1635
1636 break;
1637 }
1638 }
1639 #endif /* MDASSEMBLE */
1640
1641 void convert_from_4k_imsm_migr_rec(struct intel_super *super)
1642 {
1643 struct migr_record *migr_rec = super->migr_rec;
1644
1645 migr_rec->blocks_per_unit *= IMSM_4K_DIV;
1646 migr_rec->ckpt_area_pba *= IMSM_4K_DIV;
1647 migr_rec->dest_1st_member_lba *= IMSM_4K_DIV;
1648 migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
1649 split_ull((join_u32(migr_rec->post_migr_vol_cap,
1650 migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
1651 &migr_rec->post_migr_vol_cap,
1652 &migr_rec->post_migr_vol_cap_hi);
1653 }
1654
1655 void convert_from_4k(struct intel_super *super)
1656 {
1657 struct imsm_super *mpb = super->anchor;
1658 struct imsm_disk *disk;
1659 int i;
1660 __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1661
1662 for (i = 0; i < mpb->num_disks ; i++) {
1663 disk = __get_imsm_disk(mpb, i);
1664 /* disk */
1665 set_total_blocks(disk, (total_blocks(disk)*IMSM_4K_DIV));
1666 }
1667
1668 for (i = 0; i < mpb->num_raid_devs; i++) {
1669 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1670 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1671 /* dev */
1672 split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV),
1673 &dev->size_low, &dev->size_high);
1674 dev->vol.curr_migr_unit *= IMSM_4K_DIV;
1675
1676 /* map0 */
1677 set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV);
1678 map->blocks_per_strip *= IMSM_4K_DIV;
1679 set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
1680
1681 if (dev->vol.migr_state) {
1682 /* map1 */
1683 map = get_imsm_map(dev, MAP_1);
1684 set_blocks_per_member(map,
1685 blocks_per_member(map)*IMSM_4K_DIV);
1686 map->blocks_per_strip *= IMSM_4K_DIV;
1687 set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
1688 }
1689 }
1690 if (bbm_log_size) {
1691 struct bbm_log *log = (void *)mpb +
1692 __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1693 __u32 i;
1694
1695 for (i = 0; i < log->entry_count; i++) {
1696 struct bbm_log_entry *entry =
1697 &log->marked_block_entries[i];
1698
1699 __u8 count = entry->marked_count + 1;
1700 unsigned long long sector =
1701 __le48_to_cpu(&entry->defective_block_start);
1702
1703 entry->defective_block_start =
1704 __cpu_to_le48(sector*IMSM_4K_DIV);
1705 entry->marked_count = count*IMSM_4K_DIV - 1;
1706 }
1707 }
1708
1709 mpb->check_sum = __gen_imsm_checksum(mpb);
1710 }
1711
1712 /*******************************************************************************
1713 * function: imsm_check_attributes
1714 * Description: Function checks if features represented by attributes flags
1715 * are supported by mdadm.
1716 * Parameters:
1717 * attributes - Attributes read from metadata
1718 * Returns:
1719 * 0 - passed attributes contains unsupported features flags
1720 * 1 - all features are supported
1721 ******************************************************************************/
1722 static int imsm_check_attributes(__u32 attributes)
1723 {
1724 int ret_val = 1;
1725 __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
1726
1727 not_supported &= ~MPB_ATTRIB_IGNORED;
1728
1729 not_supported &= attributes;
1730 if (not_supported) {
1731 pr_err("(IMSM): Unsupported attributes : %x\n",
1732 (unsigned)__le32_to_cpu(not_supported));
1733 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1734 dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
1735 not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1736 }
1737 if (not_supported & MPB_ATTRIB_2TB) {
1738 dprintf("\t\tMPB_ATTRIB_2TB\n");
1739 not_supported ^= MPB_ATTRIB_2TB;
1740 }
1741 if (not_supported & MPB_ATTRIB_RAID0) {
1742 dprintf("\t\tMPB_ATTRIB_RAID0\n");
1743 not_supported ^= MPB_ATTRIB_RAID0;
1744 }
1745 if (not_supported & MPB_ATTRIB_RAID1) {
1746 dprintf("\t\tMPB_ATTRIB_RAID1\n");
1747 not_supported ^= MPB_ATTRIB_RAID1;
1748 }
1749 if (not_supported & MPB_ATTRIB_RAID10) {
1750 dprintf("\t\tMPB_ATTRIB_RAID10\n");
1751 not_supported ^= MPB_ATTRIB_RAID10;
1752 }
1753 if (not_supported & MPB_ATTRIB_RAID1E) {
1754 dprintf("\t\tMPB_ATTRIB_RAID1E\n");
1755 not_supported ^= MPB_ATTRIB_RAID1E;
1756 }
1757 if (not_supported & MPB_ATTRIB_RAID5) {
1758 dprintf("\t\tMPB_ATTRIB_RAID5\n");
1759 not_supported ^= MPB_ATTRIB_RAID5;
1760 }
1761 if (not_supported & MPB_ATTRIB_RAIDCNG) {
1762 dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
1763 not_supported ^= MPB_ATTRIB_RAIDCNG;
1764 }
1765 if (not_supported & MPB_ATTRIB_BBM) {
1766 dprintf("\t\tMPB_ATTRIB_BBM\n");
1767 not_supported ^= MPB_ATTRIB_BBM;
1768 }
1769 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1770 dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
1771 not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1772 }
1773 if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
1774 dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
1775 not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
1776 }
1777 if (not_supported & MPB_ATTRIB_2TB_DISK) {
1778 dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
1779 not_supported ^= MPB_ATTRIB_2TB_DISK;
1780 }
1781 if (not_supported & MPB_ATTRIB_NEVER_USE2) {
1782 dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
1783 not_supported ^= MPB_ATTRIB_NEVER_USE2;
1784 }
1785 if (not_supported & MPB_ATTRIB_NEVER_USE) {
1786 dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
1787 not_supported ^= MPB_ATTRIB_NEVER_USE;
1788 }
1789
1790 if (not_supported)
1791 dprintf("(IMSM): Unknown attributes : %x\n", not_supported);
1792
1793 ret_val = 0;
1794 }
1795
1796 return ret_val;
1797 }
1798
1799 #ifndef MDASSEMBLE
1800 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
1801
1802 static void examine_super_imsm(struct supertype *st, char *homehost)
1803 {
1804 struct intel_super *super = st->sb;
1805 struct imsm_super *mpb = super->anchor;
1806 char str[MAX_SIGNATURE_LENGTH];
1807 int i;
1808 struct mdinfo info;
1809 char nbuf[64];
1810 __u32 sum;
1811 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1812 struct dl *dl;
1813
1814 strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
1815 str[MPB_SIG_LEN-1] = '\0';
1816 printf(" Magic : %s\n", str);
1817 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1818 printf(" Version : %s\n", get_imsm_version(mpb));
1819 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1820 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1821 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1822 printf(" Attributes : ");
1823 if (imsm_check_attributes(mpb->attributes))
1824 printf("All supported\n");
1825 else
1826 printf("not supported\n");
1827 getinfo_super_imsm(st, &info, NULL);
1828 fname_from_uuid(st, &info, nbuf, ':');
1829 printf(" UUID : %s\n", nbuf + 5);
1830 sum = __le32_to_cpu(mpb->check_sum);
1831 printf(" Checksum : %08x %s\n", sum,
1832 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1833 printf(" MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size));
1834 printf(" Disks : %d\n", mpb->num_disks);
1835 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
1836 print_imsm_disk(__get_imsm_disk(mpb, super->disks->index),
1837 super->disks->index, reserved, super->sector_size);
1838 if (get_imsm_bbm_log_size(super->bbm_log)) {
1839 struct bbm_log *log = super->bbm_log;
1840
1841 printf("\n");
1842 printf("Bad Block Management Log:\n");
1843 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1844 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1845 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1846 }
1847 for (i = 0; i < mpb->num_raid_devs; i++) {
1848 struct mdinfo info;
1849 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1850
1851 super->current_vol = i;
1852 getinfo_super_imsm(st, &info, NULL);
1853 fname_from_uuid(st, &info, nbuf, ':');
1854 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
1855 }
1856 for (i = 0; i < mpb->num_disks; i++) {
1857 if (i == super->disks->index)
1858 continue;
1859 print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved,
1860 super->sector_size);
1861 }
1862
1863 for (dl = super->disks; dl; dl = dl->next)
1864 if (dl->index == -1)
1865 print_imsm_disk(&dl->disk, -1, reserved,
1866 super->sector_size);
1867
1868 examine_migr_rec_imsm(super);
1869 }
1870
1871 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1872 {
1873 /* We just write a generic IMSM ARRAY entry */
1874 struct mdinfo info;
1875 char nbuf[64];
1876 struct intel_super *super = st->sb;
1877
1878 if (!super->anchor->num_raid_devs) {
1879 printf("ARRAY metadata=imsm\n");
1880 return;
1881 }
1882
1883 getinfo_super_imsm(st, &info, NULL);
1884 fname_from_uuid(st, &info, nbuf, ':');
1885 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1886 }
1887
1888 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1889 {
1890 /* We just write a generic IMSM ARRAY entry */
1891 struct mdinfo info;
1892 char nbuf[64];
1893 char nbuf1[64];
1894 struct intel_super *super = st->sb;
1895 int i;
1896
1897 if (!super->anchor->num_raid_devs)
1898 return;
1899
1900 getinfo_super_imsm(st, &info, NULL);
1901 fname_from_uuid(st, &info, nbuf, ':');
1902 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1903 struct imsm_dev *dev = get_imsm_dev(super, i);
1904
1905 super->current_vol = i;
1906 getinfo_super_imsm(st, &info, NULL);
1907 fname_from_uuid(st, &info, nbuf1, ':');
1908 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1909 dev->volume, nbuf + 5, i, nbuf1 + 5);
1910 }
1911 }
1912
1913 static void export_examine_super_imsm(struct supertype *st)
1914 {
1915 struct intel_super *super = st->sb;
1916 struct imsm_super *mpb = super->anchor;
1917 struct mdinfo info;
1918 char nbuf[64];
1919
1920 getinfo_super_imsm(st, &info, NULL);
1921 fname_from_uuid(st, &info, nbuf, ':');
1922 printf("MD_METADATA=imsm\n");
1923 printf("MD_LEVEL=container\n");
1924 printf("MD_UUID=%s\n", nbuf+5);
1925 printf("MD_DEVICES=%u\n", mpb->num_disks);
1926 }
1927
1928 static int copy_metadata_imsm(struct supertype *st, int from, int to)
1929 {
1930 /* The second last sector of the device contains
1931 * the "struct imsm_super" metadata.
1932 * This contains mpb_size which is the size in bytes of the
1933 * extended metadata. This is located immediately before
1934 * the imsm_super.
1935 * We want to read all that, plus the last sector which
1936 * may contain a migration record, and write it all
1937 * to the target.
1938 */
1939 void *buf;
1940 unsigned long long dsize, offset;
1941 int sectors;
1942 struct imsm_super *sb;
1943 struct intel_super *super = st->sb;
1944 unsigned int sector_size = super->sector_size;
1945 unsigned int written = 0;
1946
1947 if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
1948 return 1;
1949
1950 if (!get_dev_size(from, NULL, &dsize))
1951 goto err;
1952
1953 if (lseek64(from, dsize-(2*sector_size), 0) < 0)
1954 goto err;
1955 if ((unsigned int)read(from, buf, sector_size) != sector_size)
1956 goto err;
1957 sb = buf;
1958 if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
1959 goto err;
1960
1961 sectors = mpb_sectors(sb, sector_size) + 2;
1962 offset = dsize - sectors * sector_size;
1963 if (lseek64(from, offset, 0) < 0 ||
1964 lseek64(to, offset, 0) < 0)
1965 goto err;
1966 while (written < sectors * sector_size) {
1967 int n = sectors*sector_size - written;
1968 if (n > 4096)
1969 n = 4096;
1970 if (read(from, buf, n) != n)
1971 goto err;
1972 if (write(to, buf, n) != n)
1973 goto err;
1974 written += n;
1975 }
1976 free(buf);
1977 return 0;
1978 err:
1979 free(buf);
1980 return 1;
1981 }
1982
1983 static void detail_super_imsm(struct supertype *st, char *homehost)
1984 {
1985 struct mdinfo info;
1986 char nbuf[64];
1987
1988 getinfo_super_imsm(st, &info, NULL);
1989 fname_from_uuid(st, &info, nbuf, ':');
1990 printf("\n UUID : %s\n", nbuf + 5);
1991 }
1992
1993 static void brief_detail_super_imsm(struct supertype *st)
1994 {
1995 struct mdinfo info;
1996 char nbuf[64];
1997 getinfo_super_imsm(st, &info, NULL);
1998 fname_from_uuid(st, &info, nbuf, ':');
1999 printf(" UUID=%s", nbuf + 5);
2000 }
2001
2002 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
2003 static void fd2devname(int fd, char *name);
2004
2005 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
2006 {
2007 /* dump an unsorted list of devices attached to AHCI Intel storage
2008 * controller, as well as non-connected ports
2009 */
2010 int hba_len = strlen(hba_path) + 1;
2011 struct dirent *ent;
2012 DIR *dir;
2013 char *path = NULL;
2014 int err = 0;
2015 unsigned long port_mask = (1 << port_count) - 1;
2016
2017 if (port_count > (int)sizeof(port_mask) * 8) {
2018 if (verbose > 0)
2019 pr_err("port_count %d out of range\n", port_count);
2020 return 2;
2021 }
2022
2023 /* scroll through /sys/dev/block looking for devices attached to
2024 * this hba
2025 */
2026 dir = opendir("/sys/dev/block");
2027 if (!dir)
2028 return 1;
2029
2030 for (ent = readdir(dir); ent; ent = readdir(dir)) {
2031 int fd;
2032 char model[64];
2033 char vendor[64];
2034 char buf[1024];
2035 int major, minor;
2036 char *device;
2037 char *c;
2038 int port;
2039 int type;
2040
2041 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
2042 continue;
2043 path = devt_to_devpath(makedev(major, minor));
2044 if (!path)
2045 continue;
2046 if (!path_attached_to_hba(path, hba_path)) {
2047 free(path);
2048 path = NULL;
2049 continue;
2050 }
2051
2052 /* retrieve the scsi device type */
2053 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
2054 if (verbose > 0)
2055 pr_err("failed to allocate 'device'\n");
2056 err = 2;
2057 break;
2058 }
2059 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
2060 if (load_sys(device, buf, sizeof(buf)) != 0) {
2061 if (verbose > 0)
2062 pr_err("failed to read device type for %s\n",
2063 path);
2064 err = 2;
2065 free(device);
2066 break;
2067 }
2068 type = strtoul(buf, NULL, 10);
2069
2070 /* if it's not a disk print the vendor and model */
2071 if (!(type == 0 || type == 7 || type == 14)) {
2072 vendor[0] = '\0';
2073 model[0] = '\0';
2074 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
2075 if (load_sys(device, buf, sizeof(buf)) == 0) {
2076 strncpy(vendor, buf, sizeof(vendor));
2077 vendor[sizeof(vendor) - 1] = '\0';
2078 c = (char *) &vendor[sizeof(vendor) - 1];
2079 while (isspace(*c) || *c == '\0')
2080 *c-- = '\0';
2081
2082 }
2083 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
2084 if (load_sys(device, buf, sizeof(buf)) == 0) {
2085 strncpy(model, buf, sizeof(model));
2086 model[sizeof(model) - 1] = '\0';
2087 c = (char *) &model[sizeof(model) - 1];
2088 while (isspace(*c) || *c == '\0')
2089 *c-- = '\0';
2090 }
2091
2092 if (vendor[0] && model[0])
2093 sprintf(buf, "%.64s %.64s", vendor, model);
2094 else
2095 switch (type) { /* numbers from hald/linux/device.c */
2096 case 1: sprintf(buf, "tape"); break;
2097 case 2: sprintf(buf, "printer"); break;
2098 case 3: sprintf(buf, "processor"); break;
2099 case 4:
2100 case 5: sprintf(buf, "cdrom"); break;
2101 case 6: sprintf(buf, "scanner"); break;
2102 case 8: sprintf(buf, "media_changer"); break;
2103 case 9: sprintf(buf, "comm"); break;
2104 case 12: sprintf(buf, "raid"); break;
2105 default: sprintf(buf, "unknown");
2106 }
2107 } else
2108 buf[0] = '\0';
2109 free(device);
2110
2111 /* chop device path to 'host%d' and calculate the port number */
2112 c = strchr(&path[hba_len], '/');
2113 if (!c) {
2114 if (verbose > 0)
2115 pr_err("%s - invalid path name\n", path + hba_len);
2116 err = 2;
2117 break;
2118 }
2119 *c = '\0';
2120 if ((sscanf(&path[hba_len], "ata%d", &port) == 1) ||
2121 ((sscanf(&path[hba_len], "host%d", &port) == 1)))
2122 port -= host_base;
2123 else {
2124 if (verbose > 0) {
2125 *c = '/'; /* repair the full string */
2126 pr_err("failed to determine port number for %s\n",
2127 path);
2128 }
2129 err = 2;
2130 break;
2131 }
2132
2133 /* mark this port as used */
2134 port_mask &= ~(1 << port);
2135
2136 /* print out the device information */
2137 if (buf[0]) {
2138 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
2139 continue;
2140 }
2141
2142 fd = dev_open(ent->d_name, O_RDONLY);
2143 if (fd < 0)
2144 printf(" Port%d : - disk info unavailable -\n", port);
2145 else {
2146 fd2devname(fd, buf);
2147 printf(" Port%d : %s", port, buf);
2148 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
2149 printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
2150 else
2151 printf(" ()\n");
2152 close(fd);
2153 }
2154 free(path);
2155 path = NULL;
2156 }
2157 if (path)
2158 free(path);
2159 if (dir)
2160 closedir(dir);
2161 if (err == 0) {
2162 int i;
2163
2164 for (i = 0; i < port_count; i++)
2165 if (port_mask & (1 << i))
2166 printf(" Port%d : - no device attached -\n", i);
2167 }
2168
2169 return err;
2170 }
2171
2172 static int print_vmd_attached_devs(struct sys_dev *hba)
2173 {
2174 struct dirent *ent;
2175 DIR *dir;
2176 char path[292];
2177 char link[256];
2178 char *c, *rp;
2179
2180 if (hba->type != SYS_DEV_VMD)
2181 return 1;
2182
2183 /* scroll through /sys/dev/block looking for devices attached to
2184 * this hba
2185 */
2186 dir = opendir("/sys/bus/pci/drivers/nvme");
2187 if (!dir)
2188 return 1;
2189
2190 for (ent = readdir(dir); ent; ent = readdir(dir)) {
2191 int n;
2192
2193 /* is 'ent' a device? check that the 'subsystem' link exists and
2194 * that its target matches 'bus'
2195 */
2196 sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem",
2197 ent->d_name);
2198 n = readlink(path, link, sizeof(link));
2199 if (n < 0 || n >= (int)sizeof(link))
2200 continue;
2201 link[n] = '\0';
2202 c = strrchr(link, '/');
2203 if (!c)
2204 continue;
2205 if (strncmp("pci", c+1, strlen("pci")) != 0)
2206 continue;
2207
2208 sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name);
2209
2210 rp = realpath(path, NULL);
2211 if (!rp)
2212 continue;
2213
2214 if (path_attached_to_hba(rp, hba->path)) {
2215 printf(" NVMe under VMD : %s\n", rp);
2216 }
2217 free(rp);
2218 }
2219
2220 closedir(dir);
2221 return 0;
2222 }
2223
2224 static void print_found_intel_controllers(struct sys_dev *elem)
2225 {
2226 for (; elem; elem = elem->next) {
2227 pr_err("found Intel(R) ");
2228 if (elem->type == SYS_DEV_SATA)
2229 fprintf(stderr, "SATA ");
2230 else if (elem->type == SYS_DEV_SAS)
2231 fprintf(stderr, "SAS ");
2232 else if (elem->type == SYS_DEV_NVME)
2233 fprintf(stderr, "NVMe ");
2234
2235 if (elem->type == SYS_DEV_VMD)
2236 fprintf(stderr, "VMD domain");
2237 else
2238 fprintf(stderr, "RAID controller");
2239
2240 if (elem->pci_id)
2241 fprintf(stderr, " at %s", elem->pci_id);
2242 fprintf(stderr, ".\n");
2243 }
2244 fflush(stderr);
2245 }
2246
2247 static int ahci_get_port_count(const char *hba_path, int *port_count)
2248 {
2249 struct dirent *ent;
2250 DIR *dir;
2251 int host_base = -1;
2252
2253 *port_count = 0;
2254 if ((dir = opendir(hba_path)) == NULL)
2255 return -1;
2256
2257 for (ent = readdir(dir); ent; ent = readdir(dir)) {
2258 int host;
2259
2260 if ((sscanf(ent->d_name, "ata%d", &host) != 1) &&
2261 ((sscanf(ent->d_name, "host%d", &host) != 1)))
2262 continue;
2263 if (*port_count == 0)
2264 host_base = host;
2265 else if (host < host_base)
2266 host_base = host;
2267
2268 if (host + 1 > *port_count + host_base)
2269 *port_count = host + 1 - host_base;
2270 }
2271 closedir(dir);
2272 return host_base;
2273 }
2274
2275 static void print_imsm_capability(const struct imsm_orom *orom)
2276 {
2277 printf(" Platform : Intel(R) ");
2278 if (orom->capabilities == 0 && orom->driver_features == 0)
2279 printf("Matrix Storage Manager\n");
2280 else
2281 printf("Rapid Storage Technology%s\n",
2282 imsm_orom_is_enterprise(orom) ? " enterprise" : "");
2283 if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2284 printf(" Version : %d.%d.%d.%d\n", orom->major_ver,
2285 orom->minor_ver, orom->hotfix_ver, orom->build);
2286 printf(" RAID Levels :%s%s%s%s%s\n",
2287 imsm_orom_has_raid0(orom) ? " raid0" : "",
2288 imsm_orom_has_raid1(orom) ? " raid1" : "",
2289 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
2290 imsm_orom_has_raid10(orom) ? " raid10" : "",
2291 imsm_orom_has_raid5(orom) ? " raid5" : "");
2292 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2293 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
2294 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
2295 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
2296 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
2297 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
2298 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
2299 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
2300 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
2301 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
2302 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
2303 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
2304 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
2305 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
2306 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
2307 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
2308 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
2309 printf(" 2TB volumes :%s supported\n",
2310 (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not");
2311 printf(" 2TB disks :%s supported\n",
2312 (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not");
2313 printf(" Max Disks : %d\n", orom->tds);
2314 printf(" Max Volumes : %d per array, %d per %s\n",
2315 orom->vpa, orom->vphba,
2316 imsm_orom_is_nvme(orom) ? "platform" : "controller");
2317 return;
2318 }
2319
2320 static void print_imsm_capability_export(const struct imsm_orom *orom)
2321 {
2322 printf("MD_FIRMWARE_TYPE=imsm\n");
2323 if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2324 printf("IMSM_VERSION=%d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
2325 orom->hotfix_ver, orom->build);
2326 printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
2327 imsm_orom_has_raid0(orom) ? "raid0 " : "",
2328 imsm_orom_has_raid1(orom) ? "raid1 " : "",
2329 imsm_orom_has_raid1e(orom) ? "raid1e " : "",
2330 imsm_orom_has_raid5(orom) ? "raid10 " : "",
2331 imsm_orom_has_raid10(orom) ? "raid5 " : "");
2332 printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2333 imsm_orom_has_chunk(orom, 2) ? "2k " : "",
2334 imsm_orom_has_chunk(orom, 4) ? "4k " : "",
2335 imsm_orom_has_chunk(orom, 8) ? "8k " : "",
2336 imsm_orom_has_chunk(orom, 16) ? "16k " : "",
2337 imsm_orom_has_chunk(orom, 32) ? "32k " : "",
2338 imsm_orom_has_chunk(orom, 64) ? "64k " : "",
2339 imsm_orom_has_chunk(orom, 128) ? "128k " : "",
2340 imsm_orom_has_chunk(orom, 256) ? "256k " : "",
2341 imsm_orom_has_chunk(orom, 512) ? "512k " : "",
2342 imsm_orom_has_chunk(orom, 1024*1) ? "1M " : "",
2343 imsm_orom_has_chunk(orom, 1024*2) ? "2M " : "",
2344 imsm_orom_has_chunk(orom, 1024*4) ? "4M " : "",
2345 imsm_orom_has_chunk(orom, 1024*8) ? "8M " : "",
2346 imsm_orom_has_chunk(orom, 1024*16) ? "16M " : "",
2347 imsm_orom_has_chunk(orom, 1024*32) ? "32M " : "",
2348 imsm_orom_has_chunk(orom, 1024*64) ? "64M " : "");
2349 printf("IMSM_2TB_VOLUMES=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB) ? "yes" : "no");
2350 printf("IMSM_2TB_DISKS=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB_DISK) ? "yes" : "no");
2351 printf("IMSM_MAX_DISKS=%d\n",orom->tds);
2352 printf("IMSM_MAX_VOLUMES_PER_ARRAY=%d\n",orom->vpa);
2353 printf("IMSM_MAX_VOLUMES_PER_CONTROLLER=%d\n",orom->vphba);
2354 }
2355
2356 static int detail_platform_imsm(int verbose, int enumerate_only, char *controller_path)
2357 {
2358 /* There are two components to imsm platform support, the ahci SATA
2359 * controller and the option-rom. To find the SATA controller we
2360 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
2361 * controller with the Intel vendor id is present. This approach
2362 * allows mdadm to leverage the kernel's ahci detection logic, with the
2363 * caveat that if ahci.ko is not loaded mdadm will not be able to
2364 * detect platform raid capabilities. The option-rom resides in a
2365 * platform "Adapter ROM". We scan for its signature to retrieve the
2366 * platform capabilities. If raid support is disabled in the BIOS the
2367 * option-rom capability structure will not be available.
2368 */
2369 struct sys_dev *list, *hba;
2370 int host_base = 0;
2371 int port_count = 0;
2372 int result=1;
2373
2374 if (enumerate_only) {
2375 if (check_env("IMSM_NO_PLATFORM"))
2376 return 0;
2377 list = find_intel_devices();
2378 if (!list)
2379 return 2;
2380 for (hba = list; hba; hba = hba->next) {
2381 if (find_imsm_capability(hba)) {
2382 result = 0;
2383 break;
2384 }
2385 else
2386 result = 2;
2387 }
2388 return result;
2389 }
2390
2391 list = find_intel_devices();
2392 if (!list) {
2393 if (verbose > 0)
2394 pr_err("no active Intel(R) RAID controller found.\n");
2395 return 2;
2396 } else if (verbose > 0)
2397 print_found_intel_controllers(list);
2398
2399 for (hba = list; hba; hba = hba->next) {
2400 if (controller_path && (compare_paths(hba->path, controller_path) != 0))
2401 continue;
2402 if (!find_imsm_capability(hba)) {
2403 char buf[PATH_MAX];
2404 pr_err("imsm capabilities not found for controller: %s (type %s)\n",
2405 hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path,
2406 get_sys_dev_type(hba->type));
2407 continue;
2408 }
2409 result = 0;
2410 }
2411
2412 if (controller_path && result == 1) {
2413 pr_err("no active Intel(R) RAID controller found under %s\n",
2414 controller_path);
2415 return result;
2416 }
2417
2418 const struct orom_entry *entry;
2419
2420 for (entry = orom_entries; entry; entry = entry->next) {
2421 if (entry->type == SYS_DEV_VMD) {
2422 print_imsm_capability(&entry->orom);
2423 printf(" 3rd party NVMe :%s supported\n",
2424 imsm_orom_has_tpv_support(&entry->orom)?"":" not");
2425 for (hba = list; hba; hba = hba->next) {
2426 if (hba->type == SYS_DEV_VMD) {
2427 char buf[PATH_MAX];
2428 printf(" I/O Controller : %s (%s)\n",
2429 vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
2430 if (print_vmd_attached_devs(hba)) {
2431 if (verbose > 0)
2432 pr_err("failed to get devices attached to VMD domain.\n");
2433 result |= 2;
2434 }
2435 }
2436 }
2437 printf("\n");
2438 continue;
2439 }
2440
2441 print_imsm_capability(&entry->orom);
2442 if (entry->type == SYS_DEV_NVME) {
2443 for (hba = list; hba; hba = hba->next) {
2444 if (hba->type == SYS_DEV_NVME)
2445 printf(" NVMe Device : %s\n", hba->path);
2446 }
2447 printf("\n");
2448 continue;
2449 }
2450
2451 struct devid_list *devid;
2452 for (devid = entry->devid_list; devid; devid = devid->next) {
2453 hba = device_by_id(devid->devid);
2454 if (!hba)
2455 continue;
2456
2457 printf(" I/O Controller : %s (%s)\n",
2458 hba->path, get_sys_dev_type(hba->type));
2459 if (hba->type == SYS_DEV_SATA) {
2460 host_base = ahci_get_port_count(hba->path, &port_count);
2461 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
2462 if (verbose > 0)
2463 pr_err("failed to enumerate ports on SATA controller at %s.\n", hba->pci_id);
2464 result |= 2;
2465 }
2466 }
2467 }
2468 printf("\n");
2469 }
2470
2471 return result;
2472 }
2473
2474 static int export_detail_platform_imsm(int verbose, char *controller_path)
2475 {
2476 struct sys_dev *list, *hba;
2477 int result=1;
2478
2479 list = find_intel_devices();
2480 if (!list) {
2481 if (verbose > 0)
2482 pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_INTEL_DEVICES\n");
2483 result = 2;
2484 return result;
2485 }
2486
2487 for (hba = list; hba; hba = hba->next) {
2488 if (controller_path && (compare_paths(hba->path,controller_path) != 0))
2489 continue;
2490 if (!find_imsm_capability(hba) && verbose > 0) {
2491 char buf[PATH_MAX];
2492 pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",
2493 hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path);
2494 }
2495 else
2496 result = 0;
2497 }
2498
2499 const struct orom_entry *entry;
2500
2501 for (entry = orom_entries; entry; entry = entry->next) {
2502 if (entry->type == SYS_DEV_VMD) {
2503 for (hba = list; hba; hba = hba->next)
2504 print_imsm_capability_export(&entry->orom);
2505 continue;
2506 }
2507 print_imsm_capability_export(&entry->orom);
2508 }
2509
2510 return result;
2511 }
2512
2513 #endif
2514
2515 static int match_home_imsm(struct supertype *st, char *homehost)
2516 {
2517 /* the imsm metadata format does not specify any host
2518 * identification information. We return -1 since we can never
2519 * confirm nor deny whether a given array is "meant" for this
2520 * host. We rely on compare_super and the 'family_num' fields to
2521 * exclude member disks that do not belong, and we rely on
2522 * mdadm.conf to specify the arrays that should be assembled.
2523 * Auto-assembly may still pick up "foreign" arrays.
2524 */
2525
2526 return -1;
2527 }
2528
2529 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
2530 {
2531 /* The uuid returned here is used for:
2532 * uuid to put into bitmap file (Create, Grow)
2533 * uuid for backup header when saving critical section (Grow)
2534 * comparing uuids when re-adding a device into an array
2535 * In these cases the uuid required is that of the data-array,
2536 * not the device-set.
2537 * uuid to recognise same set when adding a missing device back
2538 * to an array. This is a uuid for the device-set.
2539 *
2540 * For each of these we can make do with a truncated
2541 * or hashed uuid rather than the original, as long as
2542 * everyone agrees.
2543 * In each case the uuid required is that of the data-array,
2544 * not the device-set.
2545 */
2546 /* imsm does not track uuid's so we synthesis one using sha1 on
2547 * - The signature (Which is constant for all imsm array, but no matter)
2548 * - the orig_family_num of the container
2549 * - the index number of the volume
2550 * - the 'serial' number of the volume.
2551 * Hopefully these are all constant.
2552 */
2553 struct intel_super *super = st->sb;
2554
2555 char buf[20];
2556 struct sha1_ctx ctx;
2557 struct imsm_dev *dev = NULL;
2558 __u32 family_num;
2559
2560 /* some mdadm versions failed to set ->orig_family_num, in which
2561 * case fall back to ->family_num. orig_family_num will be
2562 * fixed up with the first metadata update.
2563 */
2564 family_num = super->anchor->orig_family_num;
2565 if (family_num == 0)
2566 family_num = super->anchor->family_num;
2567 sha1_init_ctx(&ctx);
2568 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
2569 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
2570 if (super->current_vol >= 0)
2571 dev = get_imsm_dev(super, super->current_vol);
2572 if (dev) {
2573 __u32 vol = super->current_vol;
2574 sha1_process_bytes(&vol, sizeof(vol), &ctx);
2575 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
2576 }
2577 sha1_finish_ctx(&ctx, buf);
2578 memcpy(uuid, buf, 4*4);
2579 }
2580
2581 #if 0
2582 static void
2583 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
2584 {
2585 __u8 *v = get_imsm_version(mpb);
2586 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
2587 char major[] = { 0, 0, 0 };
2588 char minor[] = { 0 ,0, 0 };
2589 char patch[] = { 0, 0, 0 };
2590 char *ver_parse[] = { major, minor, patch };
2591 int i, j;
2592
2593 i = j = 0;
2594 while (*v != '\0' && v < end) {
2595 if (*v != '.' && j < 2)
2596 ver_parse[i][j++] = *v;
2597 else {
2598 i++;
2599 j = 0;
2600 }
2601 v++;
2602 }
2603
2604 *m = strtol(minor, NULL, 0);
2605 *p = strtol(patch, NULL, 0);
2606 }
2607 #endif
2608
2609 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
2610 {
2611 /* migr_strip_size when repairing or initializing parity */
2612 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2613 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2614
2615 switch (get_imsm_raid_level(map)) {
2616 case 5:
2617 case 10:
2618 return chunk;
2619 default:
2620 return 128*1024 >> 9;
2621 }
2622 }
2623
2624 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
2625 {
2626 /* migr_strip_size when rebuilding a degraded disk, no idea why
2627 * this is different than migr_strip_size_resync(), but it's good
2628 * to be compatible
2629 */
2630 struct imsm_map *map = get_imsm_map(dev, MAP_1);
2631 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2632
2633 switch (get_imsm_raid_level(map)) {
2634 case 1:
2635 case 10:
2636 if (map->num_members % map->num_domains == 0)
2637 return 128*1024 >> 9;
2638 else
2639 return chunk;
2640 case 5:
2641 return max((__u32) 64*1024 >> 9, chunk);
2642 default:
2643 return 128*1024 >> 9;
2644 }
2645 }
2646
2647 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
2648 {
2649 struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2650 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2651 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
2652 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
2653
2654 return max((__u32) 1, hi_chunk / lo_chunk);
2655 }
2656
2657 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
2658 {
2659 struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2660 int level = get_imsm_raid_level(lo);
2661
2662 if (level == 1 || level == 10) {
2663 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2664
2665 return hi->num_domains;
2666 } else
2667 return num_stripes_per_unit_resync(dev);
2668 }
2669
2670 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
2671 {
2672 /* named 'imsm_' because raid0, raid1 and raid10
2673 * counter-intuitively have the same number of data disks
2674 */
2675 struct imsm_map *map = get_imsm_map(dev, second_map);
2676
2677 switch (get_imsm_raid_level(map)) {
2678 case 0:
2679 return map->num_members;
2680 break;
2681 case 1:
2682 case 10:
2683 return map->num_members/2;
2684 case 5:
2685 return map->num_members - 1;
2686 default:
2687 dprintf("unsupported raid level\n");
2688 return 0;
2689 }
2690 }
2691
2692 static __u32 parity_segment_depth(struct imsm_dev *dev)
2693 {
2694 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2695 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2696
2697 switch(get_imsm_raid_level(map)) {
2698 case 1:
2699 case 10:
2700 return chunk * map->num_domains;
2701 case 5:
2702 return chunk * map->num_members;
2703 default:
2704 return chunk;
2705 }
2706 }
2707
2708 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
2709 {
2710 struct imsm_map *map = get_imsm_map(dev, MAP_1);
2711 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2712 __u32 strip = block / chunk;
2713
2714 switch (get_imsm_raid_level(map)) {
2715 case 1:
2716 case 10: {
2717 __u32 vol_strip = (strip * map->num_domains) + 1;
2718 __u32 vol_stripe = vol_strip / map->num_members;
2719
2720 return vol_stripe * chunk + block % chunk;
2721 } case 5: {
2722 __u32 stripe = strip / (map->num_members - 1);
2723
2724 return stripe * chunk + block % chunk;
2725 }
2726 default:
2727 return 0;
2728 }
2729 }
2730
2731 static __u64 blocks_per_migr_unit(struct intel_super *super,
2732 struct imsm_dev *dev)
2733 {
2734 /* calculate the conversion factor between per member 'blocks'
2735 * (md/{resync,rebuild}_start) and imsm migration units, return
2736 * 0 for the 'not migrating' and 'unsupported migration' cases
2737 */
2738 if (!dev->vol.migr_state)
2739 return 0;
2740
2741 switch (migr_type(dev)) {
2742 case MIGR_GEN_MIGR: {
2743 struct migr_record *migr_rec = super->migr_rec;
2744 return __le32_to_cpu(migr_rec->blocks_per_unit);
2745 }
2746 case MIGR_VERIFY:
2747 case MIGR_REPAIR:
2748 case MIGR_INIT: {
2749 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2750 __u32 stripes_per_unit;
2751 __u32 blocks_per_unit;
2752 __u32 parity_depth;
2753 __u32 migr_chunk;
2754 __u32 block_map;
2755 __u32 block_rel;
2756 __u32 segment;
2757 __u32 stripe;
2758 __u8 disks;
2759
2760 /* yes, this is really the translation of migr_units to
2761 * per-member blocks in the 'resync' case
2762 */
2763 stripes_per_unit = num_stripes_per_unit_resync(dev);
2764 migr_chunk = migr_strip_blocks_resync(dev);
2765 disks = imsm_num_data_members(dev, MAP_0);
2766 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
2767 stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
2768 segment = blocks_per_unit / stripe;
2769 block_rel = blocks_per_unit - segment * stripe;
2770 parity_depth = parity_segment_depth(dev);
2771 block_map = map_migr_block(dev, block_rel);
2772 return block_map + parity_depth * segment;
2773 }
2774 case MIGR_REBUILD: {
2775 __u32 stripes_per_unit;
2776 __u32 migr_chunk;
2777
2778 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
2779 migr_chunk = migr_strip_blocks_rebuild(dev);
2780 return migr_chunk * stripes_per_unit;
2781 }
2782 case MIGR_STATE_CHANGE:
2783 default:
2784 return 0;
2785 }
2786 }
2787
2788 static int imsm_level_to_layout(int level)
2789 {
2790 switch (level) {
2791 case 0:
2792 case 1:
2793 return 0;
2794 case 5:
2795 case 6:
2796 return ALGORITHM_LEFT_ASYMMETRIC;
2797 case 10:
2798 return 0x102;
2799 }
2800 return UnSet;
2801 }
2802
2803 /*******************************************************************************
2804 * Function: read_imsm_migr_rec
2805 * Description: Function reads imsm migration record from last sector of disk
2806 * Parameters:
2807 * fd : disk descriptor
2808 * super : metadata info
2809 * Returns:
2810 * 0 : success,
2811 * -1 : fail
2812 ******************************************************************************/
2813 static int read_imsm_migr_rec(int fd, struct intel_super *super)
2814 {
2815 int ret_val = -1;
2816 unsigned int sector_size = super->sector_size;
2817 unsigned long long dsize;
2818
2819 get_dev_size(fd, NULL, &dsize);
2820 if (lseek64(fd, dsize - (sector_size*MIGR_REC_SECTOR_POSITION),
2821 SEEK_SET) < 0) {
2822 pr_err("Cannot seek to anchor block: %s\n",
2823 strerror(errno));
2824 goto out;
2825 }
2826 if ((unsigned int)read(fd, super->migr_rec_buf,
2827 MIGR_REC_BUF_SECTORS*sector_size) !=
2828 MIGR_REC_BUF_SECTORS*sector_size) {
2829 pr_err("Cannot read migr record block: %s\n",
2830 strerror(errno));
2831 goto out;
2832 }
2833 ret_val = 0;
2834 if (sector_size == 4096)
2835 convert_from_4k_imsm_migr_rec(super);
2836
2837 out:
2838 return ret_val;
2839 }
2840
2841 static struct imsm_dev *imsm_get_device_during_migration(
2842 struct intel_super *super)
2843 {
2844
2845 struct intel_dev *dv;
2846
2847 for (dv = super->devlist; dv; dv = dv->next) {
2848 if (is_gen_migration(dv->dev))
2849 return dv->dev;
2850 }
2851 return NULL;
2852 }
2853
2854 /*******************************************************************************
2855 * Function: load_imsm_migr_rec
2856 * Description: Function reads imsm migration record (it is stored at the last
2857 * sector of disk)
2858 * Parameters:
2859 * super : imsm internal array info
2860 * info : general array info
2861 * Returns:
2862 * 0 : success
2863 * -1 : fail
2864 * -2 : no migration in progress
2865 ******************************************************************************/
2866 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
2867 {
2868 struct mdinfo *sd;
2869 struct dl *dl;
2870 char nm[30];
2871 int retval = -1;
2872 int fd = -1;
2873 struct imsm_dev *dev;
2874 struct imsm_map *map;
2875 int slot = -1;
2876
2877 /* find map under migration */
2878 dev = imsm_get_device_during_migration(super);
2879 /* nothing to load,no migration in progress?
2880 */
2881 if (dev == NULL)
2882 return -2;
2883
2884 if (info) {
2885 for (sd = info->devs ; sd ; sd = sd->next) {
2886 /* read only from one of the first two slots */
2887 if ((sd->disk.raid_disk < 0) ||
2888 (sd->disk.raid_disk > 1))
2889 continue;
2890
2891 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
2892 fd = dev_open(nm, O_RDONLY);
2893 if (fd >= 0)
2894 break;
2895 }
2896 }
2897 if (fd < 0) {
2898 map = get_imsm_map(dev, MAP_0);
2899 for (dl = super->disks; dl; dl = dl->next) {
2900 /* skip spare and failed disks
2901 */
2902 if (dl->index < 0)
2903 continue;
2904 /* read only from one of the first two slots */
2905 if (map)
2906 slot = get_imsm_disk_slot(map, dl->index);
2907 if (map == NULL || slot > 1 || slot < 0)
2908 continue;
2909 sprintf(nm, "%d:%d", dl->major, dl->minor);
2910 fd = dev_open(nm, O_RDONLY);
2911 if (fd >= 0)
2912 break;
2913 }
2914 }
2915 if (fd < 0)
2916 goto out;
2917 retval = read_imsm_migr_rec(fd, super);
2918
2919 out:
2920 if (fd >= 0)
2921 close(fd);
2922 return retval;
2923 }
2924
2925 #ifndef MDASSEMBLE
2926 /*******************************************************************************
2927 * function: imsm_create_metadata_checkpoint_update
2928 * Description: It creates update for checkpoint change.
2929 * Parameters:
2930 * super : imsm internal array info
2931 * u : pointer to prepared update
2932 * Returns:
2933 * Uptate length.
2934 * If length is equal to 0, input pointer u contains no update
2935 ******************************************************************************/
2936 static int imsm_create_metadata_checkpoint_update(
2937 struct intel_super *super,
2938 struct imsm_update_general_migration_checkpoint **u)
2939 {
2940
2941 int update_memory_size = 0;
2942
2943 dprintf("(enter)\n");
2944
2945 if (u == NULL)
2946 return 0;
2947 *u = NULL;
2948
2949 /* size of all update data without anchor */
2950 update_memory_size =
2951 sizeof(struct imsm_update_general_migration_checkpoint);
2952
2953 *u = xcalloc(1, update_memory_size);
2954 if (*u == NULL) {
2955 dprintf("error: cannot get memory\n");
2956 return 0;
2957 }
2958 (*u)->type = update_general_migration_checkpoint;
2959 (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
2960 dprintf("prepared for %u\n", (*u)->curr_migr_unit);
2961
2962 return update_memory_size;
2963 }
2964
2965 static void imsm_update_metadata_locally(struct supertype *st,
2966 void *buf, int len);
2967
2968 /*******************************************************************************
2969 * Function: write_imsm_migr_rec
2970 * Description: Function writes imsm migration record
2971 * (at the last sector of disk)
2972 * Parameters:
2973 * super : imsm internal array info
2974 * Returns:
2975 * 0 : success
2976 * -1 : if fail
2977 ******************************************************************************/
2978 static int write_imsm_migr_rec(struct supertype *st)
2979 {
2980 struct intel_super *super = st->sb;
2981 unsigned int sector_size = super->sector_size;
2982 unsigned long long dsize;
2983 char nm[30];
2984 int fd = -1;
2985 int retval = -1;
2986 struct dl *sd;
2987 int len;
2988 struct imsm_update_general_migration_checkpoint *u;
2989 struct imsm_dev *dev;
2990 struct imsm_map *map;
2991
2992 /* find map under migration */
2993 dev = imsm_get_device_during_migration(super);
2994 /* if no migration, write buffer anyway to clear migr_record
2995 * on disk based on first available device
2996 */
2997 if (dev == NULL)
2998 dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
2999 super->current_vol);
3000
3001 map = get_imsm_map(dev, MAP_0);
3002
3003 if (sector_size == 4096)
3004 convert_to_4k_imsm_migr_rec(super);
3005 for (sd = super->disks ; sd ; sd = sd->next) {
3006 int slot = -1;
3007
3008 /* skip failed and spare devices */
3009 if (sd->index < 0)
3010 continue;
3011 /* write to 2 first slots only */
3012 if (map)
3013 slot = get_imsm_disk_slot(map, sd->index);
3014 if (map == NULL || slot > 1 || slot < 0)
3015 continue;
3016
3017 sprintf(nm, "%d:%d", sd->major, sd->minor);
3018 fd = dev_open(nm, O_RDWR);
3019 if (fd < 0)
3020 continue;
3021 get_dev_size(fd, NULL, &dsize);
3022 if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size),
3023 SEEK_SET) < 0) {
3024 pr_err("Cannot seek to anchor block: %s\n",
3025 strerror(errno));
3026 goto out;
3027 }
3028 if ((unsigned int)write(fd, super->migr_rec_buf,
3029 MIGR_REC_BUF_SECTORS*sector_size) !=
3030 MIGR_REC_BUF_SECTORS*sector_size) {
3031 pr_err("Cannot write migr record block: %s\n",
3032 strerror(errno));
3033 goto out;
3034 }
3035 close(fd);
3036 fd = -1;
3037 }
3038 if (sector_size == 4096)
3039 convert_from_4k_imsm_migr_rec(super);
3040 /* update checkpoint information in metadata */
3041 len = imsm_create_metadata_checkpoint_update(super, &u);
3042 if (len <= 0) {
3043 dprintf("imsm: Cannot prepare update\n");
3044 goto out;
3045 }
3046 /* update metadata locally */
3047 imsm_update_metadata_locally(st, u, len);
3048 /* and possibly remotely */
3049 if (st->update_tail) {
3050 append_metadata_update(st, u, len);
3051 /* during reshape we do all work inside metadata handler
3052 * manage_reshape(), so metadata update has to be triggered
3053 * insida it
3054 */
3055 flush_metadata_updates(st);
3056 st->update_tail = &st->updates;
3057 } else
3058 free(u);
3059
3060 retval = 0;
3061 out:
3062 if (fd >= 0)
3063 close(fd);
3064 return retval;
3065 }
3066 #endif /* MDASSEMBLE */
3067
3068 /* spare/missing disks activations are not allowe when
3069 * array/container performs reshape operation, because
3070 * all arrays in container works on the same disks set
3071 */
3072 int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
3073 {
3074 int rv = 0;
3075 struct intel_dev *i_dev;
3076 struct imsm_dev *dev;
3077
3078 /* check whole container
3079 */
3080 for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
3081 dev = i_dev->dev;
3082 if (is_gen_migration(dev)) {
3083 /* No repair during any migration in container
3084 */
3085 rv = 1;
3086 break;
3087 }
3088 }
3089 return rv;
3090 }
3091 static unsigned long long imsm_component_size_aligment_check(int level,
3092 int chunk_size,
3093 unsigned int sector_size,
3094 unsigned long long component_size)
3095 {
3096 unsigned int component_size_alligment;
3097
3098 /* check component size aligment
3099 */
3100 component_size_alligment = component_size % (chunk_size/sector_size);
3101
3102 dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alligment = %u\n",
3103 level, chunk_size, component_size,
3104 component_size_alligment);
3105
3106 if (component_size_alligment && (level != 1) && (level != UnSet)) {
3107 dprintf("imsm: reported component size alligned from %llu ",
3108 component_size);
3109 component_size -= component_size_alligment;
3110 dprintf_cont("to %llu (%i).\n",
3111 component_size, component_size_alligment);
3112 }
3113
3114 return component_size;
3115 }
3116
3117 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
3118 {
3119 struct intel_super *super = st->sb;
3120 struct migr_record *migr_rec = super->migr_rec;
3121 struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
3122 struct imsm_map *map = get_imsm_map(dev, MAP_0);
3123 struct imsm_map *prev_map = get_imsm_map(dev, MAP_1);
3124 struct imsm_map *map_to_analyse = map;
3125 struct dl *dl;
3126 int map_disks = info->array.raid_disks;
3127
3128 memset(info, 0, sizeof(*info));
3129 if (prev_map)
3130 map_to_analyse = prev_map;
3131
3132 dl = super->current_disk;
3133
3134 info->container_member = super->current_vol;
3135 info->array.raid_disks = map->num_members;
3136 info->array.level = get_imsm_raid_level(map_to_analyse);
3137 info->array.layout = imsm_level_to_layout(info->array.level);
3138 info->array.md_minor = -1;
3139 info->array.ctime = 0;
3140 info->array.utime = 0;
3141 info->array.chunk_size =
3142 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
3143 info->array.state = !dev->vol.dirty;
3144 info->custom_array_size = __le32_to_cpu(dev->size_high);
3145 info->custom_array_size <<= 32;
3146 info->custom_array_size |= __le32_to_cpu(dev->size_low);
3147 info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
3148
3149 if (is_gen_migration(dev)) {
3150 info->reshape_active = 1;
3151 info->new_level = get_imsm_raid_level(map);
3152 info->new_layout = imsm_level_to_layout(info->new_level);
3153 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3154 info->delta_disks = map->num_members - prev_map->num_members;
3155 if (info->delta_disks) {
3156 /* this needs to be applied to every array
3157 * in the container.
3158 */
3159 info->reshape_active = CONTAINER_RESHAPE;
3160 }
3161 /* We shape information that we give to md might have to be
3162 * modify to cope with md's requirement for reshaping arrays.
3163 * For example, when reshaping a RAID0, md requires it to be
3164 * presented as a degraded RAID4.
3165 * Also if a RAID0 is migrating to a RAID5 we need to specify
3166 * the array as already being RAID5, but the 'before' layout
3167 * is a RAID4-like layout.
3168 */
3169 switch (info->array.level) {
3170 case 0:
3171 switch(info->new_level) {
3172 case 0:
3173 /* conversion is happening as RAID4 */
3174 info->array.level = 4;
3175 info->array.raid_disks += 1;
3176 break;
3177 case 5:
3178 /* conversion is happening as RAID5 */
3179 info->array.level = 5;
3180 info->array.layout = ALGORITHM_PARITY_N;
3181 info->delta_disks -= 1;
3182 break;
3183 default:
3184 /* FIXME error message */
3185 info->array.level = UnSet;
3186 break;
3187 }
3188 break;
3189 }
3190 } else {
3191 info->new_level = UnSet;
3192 info->new_layout = UnSet;
3193 info->new_chunk = info->array.chunk_size;
3194 info->delta_disks = 0;
3195 }
3196
3197 if (dl) {
3198 info->disk.major = dl->major;
3199 info->disk.minor = dl->minor;
3200 info->disk.number = dl->index;
3201 info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse,
3202 dl->index);
3203 }
3204
3205 info->data_offset = pba_of_lba0(map_to_analyse);
3206
3207 if (info->array.level == 5) {
3208 info->component_size = num_data_stripes(map_to_analyse) *
3209 map_to_analyse->blocks_per_strip;
3210 } else {
3211 info->component_size = blocks_per_member(map_to_analyse);
3212 }
3213
3214 info->component_size = imsm_component_size_aligment_check(
3215 info->array.level,
3216 info->array.chunk_size,
3217 super->sector_size,
3218 info->component_size);
3219 info->bb.supported = 1;
3220
3221 memset(info->uuid, 0, sizeof(info->uuid));
3222 info->recovery_start = MaxSector;
3223
3224 info->reshape_progress = 0;
3225 info->resync_start = MaxSector;
3226 if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
3227 dev->vol.dirty) &&
3228 imsm_reshape_blocks_arrays_changes(super) == 0) {
3229 info->resync_start = 0;
3230 }
3231 if (dev->vol.migr_state) {
3232 switch (migr_type(dev)) {
3233 case MIGR_REPAIR:
3234 case MIGR_INIT: {
3235 __u64 blocks_per_unit = blocks_per_migr_unit(super,
3236 dev);
3237 __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
3238
3239 info->resync_start = blocks_per_unit * units;
3240 break;
3241 }
3242 case MIGR_GEN_MIGR: {
3243 __u64 blocks_per_unit = blocks_per_migr_unit(super,
3244 dev);
3245 __u64 units = __le32_to_cpu(migr_rec->curr_migr_unit);
3246 unsigned long long array_blocks;
3247 int used_disks;
3248
3249 if (__le32_to_cpu(migr_rec->ascending_migr) &&
3250 (units <
3251 (__le32_to_cpu(migr_rec->num_migr_units)-1)) &&
3252 (super->migr_rec->rec_status ==
3253 __cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
3254 units++;
3255
3256 info->reshape_progress = blocks_per_unit * units;
3257
3258 dprintf("IMSM: General Migration checkpoint : %llu (%llu) -> read reshape progress : %llu\n",
3259 (unsigned long long)units,
3260 (unsigned long long)blocks_per_unit,
3261 info->reshape_progress);
3262
3263 used_disks = imsm_num_data_members(dev, MAP_1);
3264 if (used_disks > 0) {
3265 array_blocks = blocks_per_member(map) *
3266 used_disks;
3267 /* round array size down to closest MB
3268 */
3269 info->custom_array_size = (array_blocks
3270 >> SECT_PER_MB_SHIFT)
3271 << SECT_PER_MB_SHIFT;
3272 }
3273 }
3274 case MIGR_VERIFY:
3275 /* we could emulate the checkpointing of
3276 * 'sync_action=check' migrations, but for now
3277 * we just immediately complete them
3278 */
3279 case MIGR_REBUILD:
3280 /* this is handled by container_content_imsm() */
3281 case MIGR_STATE_CHANGE:
3282 /* FIXME handle other migrations */
3283 default:
3284 /* we are not dirty, so... */
3285 info->resync_start = MaxSector;
3286 }
3287 }
3288
3289 strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
3290 info->name[MAX_RAID_SERIAL_LEN] = 0;
3291
3292 info->array.major_version = -1;
3293 info->array.minor_version = -2;
3294 sprintf(info->text_version, "/%s/%d", st->container_devnm, info->container_member);
3295 info->safe_mode_delay = 4000; /* 4 secs like the Matrix driver */
3296 uuid_from_super_imsm(st, info->uuid);
3297
3298 if (dmap) {
3299 int i, j;
3300 for (i=0; i<map_disks; i++) {
3301 dmap[i] = 0;
3302 if (i < info->array.raid_disks) {
3303 struct imsm_disk *dsk;
3304 j = get_imsm_disk_idx(dev, i, MAP_X);
3305 dsk = get_imsm_disk(super, j);
3306 if (dsk && (dsk->status & CONFIGURED_DISK))
3307 dmap[i] = 1;
3308 }
3309 }
3310 }
3311 }
3312
3313 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
3314 int failed, int look_in_map);
3315
3316 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
3317 int look_in_map);
3318
3319 #ifndef MDASSEMBLE
3320 static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
3321 {
3322 if (is_gen_migration(dev)) {
3323 int failed;
3324 __u8 map_state;
3325 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
3326
3327 failed = imsm_count_failed(super, dev, MAP_1);
3328 map_state = imsm_check_degraded(super, dev, failed, MAP_1);
3329 if (map2->map_state != map_state) {
3330 map2->map_state = map_state;
3331 super->updates_pending++;
3332 }
3333 }
3334 }
3335 #endif
3336
3337 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
3338 {
3339 struct dl *d;
3340
3341 for (d = super->missing; d; d = d->next)
3342 if (d->index == index)
3343 return &d->disk;
3344 return NULL;
3345 }
3346
3347 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
3348 {
3349 struct intel_super *super = st->sb;
3350 struct imsm_disk *disk;
3351 int map_disks = info->array.raid_disks;
3352 int max_enough = -1;
3353 int i;
3354 struct imsm_super *mpb;
3355
3356 if (super->current_vol >= 0) {
3357 getinfo_super_imsm_volume(st, info, map);
3358 return;
3359 }
3360 memset(info, 0, sizeof(*info));
3361
3362 /* Set raid_disks to zero so that Assemble will always pull in valid
3363 * spares
3364 */
3365 info->array.raid_disks = 0;
3366 info->array.level = LEVEL_CONTAINER;
3367 info->array.layout = 0;
3368 info->array.md_minor = -1;
3369 info->array.ctime = 0; /* N/A for imsm */
3370 info->array.utime = 0;
3371 info->array.chunk_size = 0;
3372
3373 info->disk.major = 0;
3374 info->disk.minor = 0;
3375 info->disk.raid_disk = -1;
3376 info->reshape_active = 0;
3377 info->array.major_version = -1;
3378 info->array.minor_version = -2;
3379 strcpy(info->text_version, "imsm");
3380 info->safe_mode_delay = 0;
3381 info->disk.number = -1;
3382 info->disk.state = 0;
3383 info->name[0] = 0;
3384 info->recovery_start = MaxSector;
3385 info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
3386 info->bb.supported = 1;
3387
3388 /* do we have the all the insync disks that we expect? */
3389 mpb = super->anchor;
3390 info->events = __le32_to_cpu(mpb->generation_num);
3391
3392 for (i = 0; i < mpb->num_raid_devs; i++) {
3393 struct imsm_dev *dev = get_imsm_dev(super, i);
3394 int failed, enough, j, missing = 0;
3395 struct imsm_map *map;
3396 __u8 state;
3397
3398 failed = imsm_count_failed(super, dev, MAP_0);
3399 state = imsm_check_degraded(super, dev, failed, MAP_0);
3400 map = get_imsm_map(dev, MAP_0);
3401
3402 /* any newly missing disks?
3403 * (catches single-degraded vs double-degraded)
3404 */
3405 for (j = 0; j < map->num_members; j++) {
3406 __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
3407 __u32 idx = ord_to_idx(ord);
3408
3409 if (!(ord & IMSM_ORD_REBUILD) &&
3410 get_imsm_missing(super, idx)) {
3411 missing = 1;
3412 break;
3413 }
3414 }
3415
3416 if (state == IMSM_T_STATE_FAILED)
3417 enough = -1;
3418 else if (state == IMSM_T_STATE_DEGRADED &&
3419 (state != map->map_state || missing))
3420 enough = 0;
3421 else /* we're normal, or already degraded */
3422 enough = 1;
3423 if (is_gen_migration(dev) && missing) {
3424 /* during general migration we need all disks
3425 * that process is running on.
3426 * No new missing disk is allowed.
3427 */
3428 max_enough = -1;
3429 enough = -1;
3430 /* no more checks necessary
3431 */
3432 break;
3433 }
3434 /* in the missing/failed disk case check to see
3435 * if at least one array is runnable
3436 */
3437 max_enough = max(max_enough, enough);
3438 }
3439 dprintf("enough: %d\n", max_enough);
3440 info->container_enough = max_enough;
3441
3442 if (super->disks) {
3443 __u32 reserved = imsm_reserved_sectors(super, super->disks);
3444
3445 disk = &super->disks->disk;
3446 info->data_offset = total_blocks(&super->disks->disk) - reserved;
3447 info->component_size = reserved;
3448 info->disk.state = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
3449 /* we don't change info->disk.raid_disk here because
3450 * this state will be finalized in mdmon after we have
3451 * found the 'most fresh' version of the metadata
3452 */
3453 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
3454 info->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
3455 }
3456
3457 /* only call uuid_from_super_imsm when this disk is part of a populated container,
3458 * ->compare_super may have updated the 'num_raid_devs' field for spares
3459 */
3460 if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
3461 uuid_from_super_imsm(st, info->uuid);
3462 else
3463 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
3464
3465 /* I don't know how to compute 'map' on imsm, so use safe default */
3466 if (map) {
3467 int i;
3468 for (i = 0; i < map_disks; i++)
3469 map[i] = 1;
3470 }
3471
3472 }
3473
3474 /* allocates memory and fills disk in mdinfo structure
3475 * for each disk in array */
3476 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
3477 {
3478 struct mdinfo *mddev;
3479 struct intel_super *super = st->sb;
3480 struct imsm_disk *disk;
3481 int count = 0;
3482 struct dl *dl;
3483 if (!super || !super->disks)
3484 return NULL;
3485 dl = super->disks;
3486 mddev = xcalloc(1, sizeof(*mddev));
3487 while (dl) {
3488 struct mdinfo *tmp;
3489 disk = &dl->disk;
3490 tmp = xcalloc(1, sizeof(*tmp));
3491 if (mddev->devs)
3492 tmp->next = mddev->devs;
3493 mddev->devs = tmp;
3494 tmp->disk.number = count++;
3495 tmp->disk.major = dl->major;
3496 tmp->disk.minor = dl->minor;
3497 tmp->disk.state = is_configured(disk) ?
3498 (1 << MD_DISK_ACTIVE) : 0;
3499 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
3500 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
3501 tmp->disk.raid_disk = -1;
3502 dl = dl->next;
3503 }
3504 return mddev;
3505 }
3506
3507 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
3508 char *update, char *devname, int verbose,
3509 int uuid_set, char *homehost)
3510 {
3511 /* For 'assemble' and 'force' we need to return non-zero if any
3512 * change was made. For others, the return value is ignored.
3513 * Update options are:
3514 * force-one : This device looks a bit old but needs to be included,
3515 * update age info appropriately.
3516 * assemble: clear any 'faulty' flag to allow this device to
3517 * be assembled.
3518 * force-array: Array is degraded but being forced, mark it clean
3519 * if that will be needed to assemble it.
3520 *
3521 * newdev: not used ????
3522 * grow: Array has gained a new device - this is currently for
3523 * linear only
3524 * resync: mark as dirty so a resync will happen.
3525 * name: update the name - preserving the homehost
3526 * uuid: Change the uuid of the array to match watch is given
3527 *
3528 * Following are not relevant for this imsm:
3529 * sparc2.2 : update from old dodgey metadata
3530 * super-minor: change the preferred_minor number
3531 * summaries: update redundant counters.
3532 * homehost: update the recorded homehost
3533 * _reshape_progress: record new reshape_progress position.
3534 */
3535 int rv = 1;
3536 struct intel_super *super = st->sb;
3537 struct imsm_super *mpb;
3538
3539 /* we can only update container info */
3540 if (!super || super->current_vol >= 0 || !super->anchor)
3541 return 1;
3542
3543 mpb = super->anchor;
3544
3545 if (strcmp(update, "uuid") == 0) {
3546 /* We take this to mean that the family_num should be updated.
3547 * However that is much smaller than the uuid so we cannot really
3548 * allow an explicit uuid to be given. And it is hard to reliably
3549 * know if one was.
3550 * So if !uuid_set we know the current uuid is random and just used
3551 * the first 'int' and copy it to the other 3 positions.
3552 * Otherwise we require the 4 'int's to be the same as would be the
3553 * case if we are using a random uuid. So an explicit uuid will be
3554 * accepted as long as all for ints are the same... which shouldn't hurt
3555 */
3556 if (!uuid_set) {
3557 info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0];
3558 rv = 0;
3559 } else {
3560 if (info->uuid[0] != info->uuid[1] ||
3561 info->uuid[1] != info->uuid[2] ||
3562 info->uuid[2] != info->uuid[3])
3563 rv = -1;
3564 else
3565 rv = 0;
3566 }
3567 if (rv == 0)
3568 mpb->orig_family_num = info->uuid[0];
3569 } else if (strcmp(update, "assemble") == 0)
3570 rv = 0;
3571 else
3572 rv = -1;
3573
3574 /* successful update? recompute checksum */
3575 if (rv == 0)
3576 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
3577
3578 return rv;
3579 }
3580
3581 static size_t disks_to_mpb_size(int disks)
3582 {
3583 size_t size;
3584