imsm: PPL support
[thirdparty/mdadm.git] / super-intel.c
1 /*
2 * mdadm - Intel(R) Matrix Storage Manager Support
3 *
4 * Copyright (C) 2002-2008 Intel Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH 32
42 #define MAX_RAID_SERIAL_LEN 16
43
44 /* supports RAID0 */
45 #define MPB_ATTRIB_RAID0 __cpu_to_le32(0x00000001)
46 /* supports RAID1 */
47 #define MPB_ATTRIB_RAID1 __cpu_to_le32(0x00000002)
48 /* supports RAID10 */
49 #define MPB_ATTRIB_RAID10 __cpu_to_le32(0x00000004)
50 /* supports RAID1E */
51 #define MPB_ATTRIB_RAID1E __cpu_to_le32(0x00000008)
52 /* supports RAID5 */
53 #define MPB_ATTRIB_RAID5 __cpu_to_le32(0x00000010)
54 /* supports RAID CNG */
55 #define MPB_ATTRIB_RAIDCNG __cpu_to_le32(0x00000020)
56 /* supports expanded stripe sizes of 256K, 512K and 1MB */
57 #define MPB_ATTRIB_EXP_STRIPE_SIZE __cpu_to_le32(0x00000040)
58
59 /* The OROM Support RST Caching of Volumes */
60 #define MPB_ATTRIB_NVM __cpu_to_le32(0x02000000)
61 /* The OROM supports creating disks greater than 2TB */
62 #define MPB_ATTRIB_2TB_DISK __cpu_to_le32(0x04000000)
63 /* The OROM supports Bad Block Management */
64 #define MPB_ATTRIB_BBM __cpu_to_le32(0x08000000)
65
66 /* THe OROM Supports NVM Caching of Volumes */
67 #define MPB_ATTRIB_NEVER_USE2 __cpu_to_le32(0x10000000)
68 /* The OROM supports creating volumes greater than 2TB */
69 #define MPB_ATTRIB_2TB __cpu_to_le32(0x20000000)
70 /* originally for PMP, now it's wasted b/c. Never use this bit! */
71 #define MPB_ATTRIB_NEVER_USE __cpu_to_le32(0x40000000)
72 /* Verify MPB contents against checksum after reading MPB */
73 #define MPB_ATTRIB_CHECKSUM_VERIFY __cpu_to_le32(0x80000000)
74
75 /* Define all supported attributes that have to be accepted by mdadm
76 */
77 #define MPB_ATTRIB_SUPPORTED (MPB_ATTRIB_CHECKSUM_VERIFY | \
78 MPB_ATTRIB_2TB | \
79 MPB_ATTRIB_2TB_DISK | \
80 MPB_ATTRIB_RAID0 | \
81 MPB_ATTRIB_RAID1 | \
82 MPB_ATTRIB_RAID10 | \
83 MPB_ATTRIB_RAID5 | \
84 MPB_ATTRIB_EXP_STRIPE_SIZE | \
85 MPB_ATTRIB_BBM)
86
87 /* Define attributes that are unused but not harmful */
88 #define MPB_ATTRIB_IGNORED (MPB_ATTRIB_NEVER_USE)
89
90 #define MPB_SECTOR_CNT 2210
91 #define IMSM_RESERVED_SECTORS 4096
92 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2056
93 #define SECT_PER_MB_SHIFT 11
94 #define MAX_SECTOR_SIZE 4096
95
96 /* Disk configuration info. */
97 #define IMSM_MAX_DEVICES 255
98 struct imsm_disk {
99 __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
100 __u32 total_blocks_lo; /* 0xE8 - 0xEB total blocks lo */
101 __u32 scsi_id; /* 0xEC - 0xEF scsi ID */
102 #define SPARE_DISK __cpu_to_le32(0x01) /* Spare */
103 #define CONFIGURED_DISK __cpu_to_le32(0x02) /* Member of some RaidDev */
104 #define FAILED_DISK __cpu_to_le32(0x04) /* Permanent failure */
105 #define JOURNAL_DISK __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
106 __u32 status; /* 0xF0 - 0xF3 */
107 __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
108 __u32 total_blocks_hi; /* 0xF4 - 0xF5 total blocks hi */
109 #define IMSM_DISK_FILLERS 3
110 __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
111 };
112
113 /* map selector for map managment
114 */
115 #define MAP_0 0
116 #define MAP_1 1
117 #define MAP_X -1
118
119 /* RAID map configuration infos. */
120 struct imsm_map {
121 __u32 pba_of_lba0_lo; /* start address of partition */
122 __u32 blocks_per_member_lo;/* blocks per member */
123 __u32 num_data_stripes_lo; /* number of data stripes */
124 __u16 blocks_per_strip;
125 __u8 map_state; /* Normal, Uninitialized, Degraded, Failed */
126 #define IMSM_T_STATE_NORMAL 0
127 #define IMSM_T_STATE_UNINITIALIZED 1
128 #define IMSM_T_STATE_DEGRADED 2
129 #define IMSM_T_STATE_FAILED 3
130 __u8 raid_level;
131 #define IMSM_T_RAID0 0
132 #define IMSM_T_RAID1 1
133 #define IMSM_T_RAID5 5 /* since metadata version 1.2.02 ? */
134 __u8 num_members; /* number of member disks */
135 __u8 num_domains; /* number of parity domains */
136 __u8 failed_disk_num; /* valid only when state is degraded */
137 __u8 ddf;
138 __u32 pba_of_lba0_hi;
139 __u32 blocks_per_member_hi;
140 __u32 num_data_stripes_hi;
141 __u32 filler[4]; /* expansion area */
142 #define IMSM_ORD_REBUILD (1 << 24)
143 __u32 disk_ord_tbl[1]; /* disk_ord_tbl[num_members],
144 * top byte contains some flags
145 */
146 } __attribute__ ((packed));
147
148 struct imsm_vol {
149 __u32 curr_migr_unit;
150 __u32 checkpoint_id; /* id to access curr_migr_unit */
151 __u8 migr_state; /* Normal or Migrating */
152 #define MIGR_INIT 0
153 #define MIGR_REBUILD 1
154 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
155 #define MIGR_GEN_MIGR 3
156 #define MIGR_STATE_CHANGE 4
157 #define MIGR_REPAIR 5
158 __u8 migr_type; /* Initializing, Rebuilding, ... */
159 #define RAIDVOL_CLEAN 0
160 #define RAIDVOL_DIRTY 1
161 #define RAIDVOL_DSRECORD_VALID 2
162 __u8 dirty;
163 __u8 fs_state; /* fast-sync state for CnG (0xff == disabled) */
164 __u16 verify_errors; /* number of mismatches */
165 __u16 bad_blocks; /* number of bad blocks during verify */
166 __u32 filler[4];
167 struct imsm_map map[1];
168 /* here comes another one if migr_state */
169 } __attribute__ ((packed));
170
171 struct imsm_dev {
172 __u8 volume[MAX_RAID_SERIAL_LEN];
173 __u32 size_low;
174 __u32 size_high;
175 #define DEV_BOOTABLE __cpu_to_le32(0x01)
176 #define DEV_BOOT_DEVICE __cpu_to_le32(0x02)
177 #define DEV_READ_COALESCING __cpu_to_le32(0x04)
178 #define DEV_WRITE_COALESCING __cpu_to_le32(0x08)
179 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
180 #define DEV_HIDDEN_AT_BOOT __cpu_to_le32(0x20)
181 #define DEV_CURRENTLY_HIDDEN __cpu_to_le32(0x40)
182 #define DEV_VERIFY_AND_FIX __cpu_to_le32(0x80)
183 #define DEV_MAP_STATE_UNINIT __cpu_to_le32(0x100)
184 #define DEV_NO_AUTO_RECOVERY __cpu_to_le32(0x200)
185 #define DEV_CLONE_N_GO __cpu_to_le32(0x400)
186 #define DEV_CLONE_MAN_SYNC __cpu_to_le32(0x800)
187 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
188 __u32 status; /* Persistent RaidDev status */
189 __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
190 __u8 migr_priority;
191 __u8 num_sub_vols;
192 __u8 tid;
193 __u8 cng_master_disk;
194 __u16 cache_policy;
195 __u8 cng_state;
196 __u8 cng_sub_state;
197 __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
198
199 /* NVM_EN */
200 __u8 nv_cache_mode;
201 __u8 nv_cache_flags;
202
203 /* Unique Volume Id of the NvCache Volume associated with this volume */
204 __u32 nvc_vol_orig_family_num;
205 __u16 nvc_vol_raid_dev_num;
206
207 #define RWH_OFF 0
208 #define RWH_DISTRIBUTED 1
209 #define RWH_JOURNALING_DRIVE 2
210 __u8 rwh_policy; /* Raid Write Hole Policy */
211 __u8 jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
212 __u8 filler1;
213
214 #define IMSM_DEV_FILLERS 3
215 __u32 filler[IMSM_DEV_FILLERS];
216 struct imsm_vol vol;
217 } __attribute__ ((packed));
218
219 struct imsm_super {
220 __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
221 __u32 check_sum; /* 0x20 - 0x23 MPB Checksum */
222 __u32 mpb_size; /* 0x24 - 0x27 Size of MPB */
223 __u32 family_num; /* 0x28 - 0x2B Checksum from first time this config was written */
224 __u32 generation_num; /* 0x2C - 0x2F Incremented each time this array's MPB is written */
225 __u32 error_log_size; /* 0x30 - 0x33 in bytes */
226 __u32 attributes; /* 0x34 - 0x37 */
227 __u8 num_disks; /* 0x38 Number of configured disks */
228 __u8 num_raid_devs; /* 0x39 Number of configured volumes */
229 __u8 error_log_pos; /* 0x3A */
230 __u8 fill[1]; /* 0x3B */
231 __u32 cache_size; /* 0x3c - 0x40 in mb */
232 __u32 orig_family_num; /* 0x40 - 0x43 original family num */
233 __u32 pwr_cycle_count; /* 0x44 - 0x47 simulated power cycle count for array */
234 __u32 bbm_log_size; /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
235 #define IMSM_FILLERS 35
236 __u32 filler[IMSM_FILLERS]; /* 0x4C - 0xD7 RAID_MPB_FILLERS */
237 struct imsm_disk disk[1]; /* 0xD8 diskTbl[numDisks] */
238 /* here comes imsm_dev[num_raid_devs] */
239 /* here comes BBM logs */
240 } __attribute__ ((packed));
241
242 #define BBM_LOG_MAX_ENTRIES 254
243 #define BBM_LOG_MAX_LBA_ENTRY_VAL 256 /* Represents 256 LBAs */
244 #define BBM_LOG_SIGNATURE 0xabadb10c
245
246 struct bbm_log_block_addr {
247 __u16 w1;
248 __u32 dw1;
249 } __attribute__ ((__packed__));
250
251 struct bbm_log_entry {
252 __u8 marked_count; /* Number of blocks marked - 1 */
253 __u8 disk_ordinal; /* Disk entry within the imsm_super */
254 struct bbm_log_block_addr defective_block_start;
255 } __attribute__ ((__packed__));
256
257 struct bbm_log {
258 __u32 signature; /* 0xABADB10C */
259 __u32 entry_count;
260 struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
261 } __attribute__ ((__packed__));
262
263 #ifndef MDASSEMBLE
264 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
265 #endif
266
267 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
268
269 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
270
271 #define MIGR_REC_BUF_SECTORS 1 /* size of migr_record i/o buffer in sectors */
272 #define MIGR_REC_SECTOR_POSITION 1 /* migr_record position offset on disk,
273 * MIGR_REC_BUF_SECTORS <= MIGR_REC_SECTOR_POS
274 */
275
276 #define UNIT_SRC_NORMAL 0 /* Source data for curr_migr_unit must
277 * be recovered using srcMap */
278 #define UNIT_SRC_IN_CP_AREA 1 /* Source data for curr_migr_unit has
279 * already been migrated and must
280 * be recovered from checkpoint area */
281
282 #define PPL_ENTRY_SPACE (128 * 1024) /* Size of the PPL, without the header */
283
284 struct migr_record {
285 __u32 rec_status; /* Status used to determine how to restart
286 * migration in case it aborts
287 * in some fashion */
288 __u32 curr_migr_unit; /* 0..numMigrUnits-1 */
289 __u32 family_num; /* Family number of MPB
290 * containing the RaidDev
291 * that is migrating */
292 __u32 ascending_migr; /* True if migrating in increasing
293 * order of lbas */
294 __u32 blocks_per_unit; /* Num disk blocks per unit of operation */
295 __u32 dest_depth_per_unit; /* Num member blocks each destMap
296 * member disk
297 * advances per unit-of-operation */
298 __u32 ckpt_area_pba; /* Pba of first block of ckpt copy area */
299 __u32 dest_1st_member_lba; /* First member lba on first
300 * stripe of destination */
301 __u32 num_migr_units; /* Total num migration units-of-op */
302 __u32 post_migr_vol_cap; /* Size of volume after
303 * migration completes */
304 __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
305 __u32 ckpt_read_disk_num; /* Which member disk in destSubMap[0] the
306 * migration ckpt record was read from
307 * (for recovered migrations) */
308 } __attribute__ ((__packed__));
309
310 struct md_list {
311 /* usage marker:
312 * 1: load metadata
313 * 2: metadata does not match
314 * 4: already checked
315 */
316 int used;
317 char *devname;
318 int found;
319 int container;
320 dev_t st_rdev;
321 struct md_list *next;
322 };
323
324 #define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
325
326 static __u8 migr_type(struct imsm_dev *dev)
327 {
328 if (dev->vol.migr_type == MIGR_VERIFY &&
329 dev->status & DEV_VERIFY_AND_FIX)
330 return MIGR_REPAIR;
331 else
332 return dev->vol.migr_type;
333 }
334
335 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
336 {
337 /* for compatibility with older oroms convert MIGR_REPAIR, into
338 * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
339 */
340 if (migr_type == MIGR_REPAIR) {
341 dev->vol.migr_type = MIGR_VERIFY;
342 dev->status |= DEV_VERIFY_AND_FIX;
343 } else {
344 dev->vol.migr_type = migr_type;
345 dev->status &= ~DEV_VERIFY_AND_FIX;
346 }
347 }
348
349 static unsigned int sector_count(__u32 bytes, unsigned int sector_size)
350 {
351 return ROUND_UP(bytes, sector_size) / sector_size;
352 }
353
354 static unsigned int mpb_sectors(struct imsm_super *mpb,
355 unsigned int sector_size)
356 {
357 return sector_count(__le32_to_cpu(mpb->mpb_size), sector_size);
358 }
359
360 struct intel_dev {
361 struct imsm_dev *dev;
362 struct intel_dev *next;
363 unsigned index;
364 };
365
366 struct intel_hba {
367 enum sys_dev_type type;
368 char *path;
369 char *pci_id;
370 struct intel_hba *next;
371 };
372
373 enum action {
374 DISK_REMOVE = 1,
375 DISK_ADD
376 };
377 /* internal representation of IMSM metadata */
378 struct intel_super {
379 union {
380 void *buf; /* O_DIRECT buffer for reading/writing metadata */
381 struct imsm_super *anchor; /* immovable parameters */
382 };
383 union {
384 void *migr_rec_buf; /* buffer for I/O operations */
385 struct migr_record *migr_rec; /* migration record */
386 };
387 int clean_migration_record_by_mdmon; /* when reshape is switched to next
388 array, it indicates that mdmon is allowed to clean migration
389 record */
390 size_t len; /* size of the 'buf' allocation */
391 size_t extra_space; /* extra space in 'buf' that is not used yet */
392 void *next_buf; /* for realloc'ing buf from the manager */
393 size_t next_len;
394 int updates_pending; /* count of pending updates for mdmon */
395 int current_vol; /* index of raid device undergoing creation */
396 unsigned long long create_offset; /* common start for 'current_vol' */
397 __u32 random; /* random data for seeding new family numbers */
398 struct intel_dev *devlist;
399 unsigned int sector_size; /* sector size of used member drives */
400 struct dl {
401 struct dl *next;
402 int index;
403 __u8 serial[MAX_RAID_SERIAL_LEN];
404 int major, minor;
405 char *devname;
406 struct imsm_disk disk;
407 int fd;
408 int extent_cnt;
409 struct extent *e; /* for determining freespace @ create */
410 int raiddisk; /* slot to fill in autolayout */
411 enum action action;
412 } *disks, *current_disk;
413 struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
414 active */
415 struct dl *missing; /* disks removed while we weren't looking */
416 struct bbm_log *bbm_log;
417 struct intel_hba *hba; /* device path of the raid controller for this metadata */
418 const struct imsm_orom *orom; /* platform firmware support */
419 struct intel_super *next; /* (temp) list for disambiguating family_num */
420 struct md_bb bb; /* memory for get_bad_blocks call */
421 };
422
423 struct intel_disk {
424 struct imsm_disk disk;
425 #define IMSM_UNKNOWN_OWNER (-1)
426 int owner;
427 struct intel_disk *next;
428 };
429
430 struct extent {
431 unsigned long long start, size;
432 };
433
434 /* definitions of reshape process types */
435 enum imsm_reshape_type {
436 CH_TAKEOVER,
437 CH_MIGRATION,
438 CH_ARRAY_SIZE,
439 };
440
441 /* definition of messages passed to imsm_process_update */
442 enum imsm_update_type {
443 update_activate_spare,
444 update_create_array,
445 update_kill_array,
446 update_rename_array,
447 update_add_remove_disk,
448 update_reshape_container_disks,
449 update_reshape_migration,
450 update_takeover,
451 update_general_migration_checkpoint,
452 update_size_change,
453 update_prealloc_badblocks_mem,
454 };
455
456 struct imsm_update_activate_spare {
457 enum imsm_update_type type;
458 struct dl *dl;
459 int slot;
460 int array;
461 struct imsm_update_activate_spare *next;
462 };
463
464 struct geo_params {
465 char devnm[32];
466 char *dev_name;
467 unsigned long long size;
468 int level;
469 int layout;
470 int chunksize;
471 int raid_disks;
472 };
473
474 enum takeover_direction {
475 R10_TO_R0,
476 R0_TO_R10
477 };
478 struct imsm_update_takeover {
479 enum imsm_update_type type;
480 int subarray;
481 enum takeover_direction direction;
482 };
483
484 struct imsm_update_reshape {
485 enum imsm_update_type type;
486 int old_raid_disks;
487 int new_raid_disks;
488
489 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
490 };
491
492 struct imsm_update_reshape_migration {
493 enum imsm_update_type type;
494 int old_raid_disks;
495 int new_raid_disks;
496 /* fields for array migration changes
497 */
498 int subdev;
499 int new_level;
500 int new_layout;
501 int new_chunksize;
502
503 int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
504 };
505
506 struct imsm_update_size_change {
507 enum imsm_update_type type;
508 int subdev;
509 long long new_size;
510 };
511
512 struct imsm_update_general_migration_checkpoint {
513 enum imsm_update_type type;
514 __u32 curr_migr_unit;
515 };
516
517 struct disk_info {
518 __u8 serial[MAX_RAID_SERIAL_LEN];
519 };
520
521 struct imsm_update_create_array {
522 enum imsm_update_type type;
523 int dev_idx;
524 struct imsm_dev dev;
525 };
526
527 struct imsm_update_kill_array {
528 enum imsm_update_type type;
529 int dev_idx;
530 };
531
532 struct imsm_update_rename_array {
533 enum imsm_update_type type;
534 __u8 name[MAX_RAID_SERIAL_LEN];
535 int dev_idx;
536 };
537
538 struct imsm_update_add_remove_disk {
539 enum imsm_update_type type;
540 };
541
542 struct imsm_update_prealloc_bb_mem {
543 enum imsm_update_type type;
544 };
545
546 static const char *_sys_dev_type[] = {
547 [SYS_DEV_UNKNOWN] = "Unknown",
548 [SYS_DEV_SAS] = "SAS",
549 [SYS_DEV_SATA] = "SATA",
550 [SYS_DEV_NVME] = "NVMe",
551 [SYS_DEV_VMD] = "VMD"
552 };
553
554 const char *get_sys_dev_type(enum sys_dev_type type)
555 {
556 if (type >= SYS_DEV_MAX)
557 type = SYS_DEV_UNKNOWN;
558
559 return _sys_dev_type[type];
560 }
561
562 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
563 {
564 struct intel_hba *result = xmalloc(sizeof(*result));
565
566 result->type = device->type;
567 result->path = xstrdup(device->path);
568 result->next = NULL;
569 if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
570 result->pci_id++;
571
572 return result;
573 }
574
575 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
576 {
577 struct intel_hba *result;
578
579 for (result = hba; result; result = result->next) {
580 if (result->type == device->type && strcmp(result->path, device->path) == 0)
581 break;
582 }
583 return result;
584 }
585
586 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
587 {
588 struct intel_hba *hba;
589
590 /* check if disk attached to Intel HBA */
591 hba = find_intel_hba(super->hba, device);
592 if (hba != NULL)
593 return 1;
594 /* Check if HBA is already attached to super */
595 if (super->hba == NULL) {
596 super->hba = alloc_intel_hba(device);
597 return 1;
598 }
599
600 hba = super->hba;
601 /* Intel metadata allows for all disks attached to the same type HBA.
602 * Do not support HBA types mixing
603 */
604 if (device->type != hba->type)
605 return 2;
606
607 /* Multiple same type HBAs can be used if they share the same OROM */
608 const struct imsm_orom *device_orom = get_orom_by_device_id(device->dev_id);
609
610 if (device_orom != super->orom)
611 return 2;
612
613 while (hba->next)
614 hba = hba->next;
615
616 hba->next = alloc_intel_hba(device);
617 return 1;
618 }
619
620 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
621 {
622 struct sys_dev *list, *elem;
623 char *disk_path;
624
625 if ((list = find_intel_devices()) == NULL)
626 return 0;
627
628 if (fd < 0)
629 disk_path = (char *) devname;
630 else
631 disk_path = diskfd_to_devpath(fd);
632
633 if (!disk_path)
634 return 0;
635
636 for (elem = list; elem; elem = elem->next)
637 if (path_attached_to_hba(disk_path, elem->path))
638 return elem;
639
640 if (disk_path != devname)
641 free(disk_path);
642
643 return NULL;
644 }
645
646 static int find_intel_hba_capability(int fd, struct intel_super *super,
647 char *devname);
648
649 static struct supertype *match_metadata_desc_imsm(char *arg)
650 {
651 struct supertype *st;
652
653 if (strcmp(arg, "imsm") != 0 &&
654 strcmp(arg, "default") != 0
655 )
656 return NULL;
657
658 st = xcalloc(1, sizeof(*st));
659 st->ss = &super_imsm;
660 st->max_devs = IMSM_MAX_DEVICES;
661 st->minor_version = 0;
662 st->sb = NULL;
663 return st;
664 }
665
666 #ifndef MDASSEMBLE
667 static __u8 *get_imsm_version(struct imsm_super *mpb)
668 {
669 return &mpb->sig[MPB_SIG_LEN];
670 }
671 #endif
672
673 /* retrieve a disk directly from the anchor when the anchor is known to be
674 * up-to-date, currently only at load time
675 */
676 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
677 {
678 if (index >= mpb->num_disks)
679 return NULL;
680 return &mpb->disk[index];
681 }
682
683 /* retrieve the disk description based on a index of the disk
684 * in the sub-array
685 */
686 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
687 {
688 struct dl *d;
689
690 for (d = super->disks; d; d = d->next)
691 if (d->index == index)
692 return d;
693
694 return NULL;
695 }
696 /* retrieve a disk from the parsed metadata */
697 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
698 {
699 struct dl *dl;
700
701 dl = get_imsm_dl_disk(super, index);
702 if (dl)
703 return &dl->disk;
704
705 return NULL;
706 }
707
708 /* generate a checksum directly from the anchor when the anchor is known to be
709 * up-to-date, currently only at load or write_super after coalescing
710 */
711 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
712 {
713 __u32 end = mpb->mpb_size / sizeof(end);
714 __u32 *p = (__u32 *) mpb;
715 __u32 sum = 0;
716
717 while (end--) {
718 sum += __le32_to_cpu(*p);
719 p++;
720 }
721
722 return sum - __le32_to_cpu(mpb->check_sum);
723 }
724
725 static size_t sizeof_imsm_map(struct imsm_map *map)
726 {
727 return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
728 }
729
730 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
731 {
732 /* A device can have 2 maps if it is in the middle of a migration.
733 * If second_map is:
734 * MAP_0 - we return the first map
735 * MAP_1 - we return the second map if it exists, else NULL
736 * MAP_X - we return the second map if it exists, else the first
737 */
738 struct imsm_map *map = &dev->vol.map[0];
739 struct imsm_map *map2 = NULL;
740
741 if (dev->vol.migr_state)
742 map2 = (void *)map + sizeof_imsm_map(map);
743
744 switch (second_map) {
745 case MAP_0:
746 break;
747 case MAP_1:
748 map = map2;
749 break;
750 case MAP_X:
751 if (map2)
752 map = map2;
753 break;
754 default:
755 map = NULL;
756 }
757 return map;
758
759 }
760
761 /* return the size of the device.
762 * migr_state increases the returned size if map[0] were to be duplicated
763 */
764 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
765 {
766 size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
767 sizeof_imsm_map(get_imsm_map(dev, MAP_0));
768
769 /* migrating means an additional map */
770 if (dev->vol.migr_state)
771 size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
772 else if (migr_state)
773 size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
774
775 return size;
776 }
777
778 #ifndef MDASSEMBLE
779 /* retrieve disk serial number list from a metadata update */
780 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
781 {
782 void *u = update;
783 struct disk_info *inf;
784
785 inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
786 sizeof_imsm_dev(&update->dev, 0);
787
788 return inf;
789 }
790 #endif
791
792 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
793 {
794 int offset;
795 int i;
796 void *_mpb = mpb;
797
798 if (index >= mpb->num_raid_devs)
799 return NULL;
800
801 /* devices start after all disks */
802 offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
803
804 for (i = 0; i <= index; i++)
805 if (i == index)
806 return _mpb + offset;
807 else
808 offset += sizeof_imsm_dev(_mpb + offset, 0);
809
810 return NULL;
811 }
812
813 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
814 {
815 struct intel_dev *dv;
816
817 if (index >= super->anchor->num_raid_devs)
818 return NULL;
819 for (dv = super->devlist; dv; dv = dv->next)
820 if (dv->index == index)
821 return dv->dev;
822 return NULL;
823 }
824
825 static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
826 *addr)
827 {
828 return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) |
829 __le16_to_cpu(addr->w1));
830 }
831
832 static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
833 {
834 struct bbm_log_block_addr addr;
835
836 addr.w1 = __cpu_to_le16((__u16)(sec & 0xffff));
837 addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff);
838 return addr;
839 }
840
841 #ifndef MDASSEMBLE
842 /* get size of the bbm log */
843 static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
844 {
845 if (!log || log->entry_count == 0)
846 return 0;
847
848 return sizeof(log->signature) +
849 sizeof(log->entry_count) +
850 log->entry_count * sizeof(struct bbm_log_entry);
851 }
852
853 /* check if bad block is not partially stored in bbm log */
854 static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned
855 long long sector, const int length, __u32 *pos)
856 {
857 __u32 i;
858
859 for (i = *pos; i < log->entry_count; i++) {
860 struct bbm_log_entry *entry = &log->marked_block_entries[i];
861 unsigned long long bb_start;
862 unsigned long long bb_end;
863
864 bb_start = __le48_to_cpu(&entry->defective_block_start);
865 bb_end = bb_start + (entry->marked_count + 1);
866
867 if ((entry->disk_ordinal == idx) && (bb_start >= sector) &&
868 (bb_end <= sector + length)) {
869 *pos = i;
870 return 1;
871 }
872 }
873 return 0;
874 }
875
876 /* record new bad block in bbm log */
877 static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned
878 long long sector, int length)
879 {
880 int new_bb = 0;
881 __u32 pos = 0;
882 struct bbm_log_entry *entry = NULL;
883
884 while (is_stored_in_bbm(log, idx, sector, length, &pos)) {
885 struct bbm_log_entry *e = &log->marked_block_entries[pos];
886
887 if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) &&
888 (__le48_to_cpu(&e->defective_block_start) == sector)) {
889 sector += BBM_LOG_MAX_LBA_ENTRY_VAL;
890 length -= BBM_LOG_MAX_LBA_ENTRY_VAL;
891 pos = pos + 1;
892 continue;
893 }
894 entry = e;
895 break;
896 }
897
898 if (entry) {
899 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
900 BBM_LOG_MAX_LBA_ENTRY_VAL;
901 entry->defective_block_start = __cpu_to_le48(sector);
902 entry->marked_count = cnt - 1;
903 if (cnt == length)
904 return 1;
905 sector += cnt;
906 length -= cnt;
907 }
908
909 new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) /
910 BBM_LOG_MAX_LBA_ENTRY_VAL;
911 if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES)
912 return 0;
913
914 while (length > 0) {
915 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
916 BBM_LOG_MAX_LBA_ENTRY_VAL;
917 struct bbm_log_entry *entry =
918 &log->marked_block_entries[log->entry_count];
919
920 entry->defective_block_start = __cpu_to_le48(sector);
921 entry->marked_count = cnt - 1;
922 entry->disk_ordinal = idx;
923
924 sector += cnt;
925 length -= cnt;
926
927 log->entry_count++;
928 }
929
930 return new_bb;
931 }
932
933 /* clear all bad blocks for given disk */
934 static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx)
935 {
936 __u32 i = 0;
937
938 while (i < log->entry_count) {
939 struct bbm_log_entry *entries = log->marked_block_entries;
940
941 if (entries[i].disk_ordinal == idx) {
942 if (i < log->entry_count - 1)
943 entries[i] = entries[log->entry_count - 1];
944 log->entry_count--;
945 } else {
946 i++;
947 }
948 }
949 }
950
951 /* clear given bad block */
952 static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
953 long long sector, const int length) {
954 __u32 i = 0;
955
956 while (i < log->entry_count) {
957 struct bbm_log_entry *entries = log->marked_block_entries;
958
959 if ((entries[i].disk_ordinal == idx) &&
960 (__le48_to_cpu(&entries[i].defective_block_start) ==
961 sector) && (entries[i].marked_count + 1 == length)) {
962 if (i < log->entry_count - 1)
963 entries[i] = entries[log->entry_count - 1];
964 log->entry_count--;
965 break;
966 }
967 i++;
968 }
969
970 return 1;
971 }
972 #endif /* MDASSEMBLE */
973
974 /* allocate and load BBM log from metadata */
975 static int load_bbm_log(struct intel_super *super)
976 {
977 struct imsm_super *mpb = super->anchor;
978 __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
979
980 super->bbm_log = xcalloc(1, sizeof(struct bbm_log));
981 if (!super->bbm_log)
982 return 1;
983
984 if (bbm_log_size) {
985 struct bbm_log *log = (void *)mpb +
986 __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
987
988 __u32 entry_count;
989
990 if (bbm_log_size < sizeof(log->signature) +
991 sizeof(log->entry_count))
992 return 2;
993
994 entry_count = __le32_to_cpu(log->entry_count);
995 if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) ||
996 (entry_count > BBM_LOG_MAX_ENTRIES))
997 return 3;
998
999 if (bbm_log_size !=
1000 sizeof(log->signature) + sizeof(log->entry_count) +
1001 entry_count * sizeof(struct bbm_log_entry))
1002 return 4;
1003
1004 memcpy(super->bbm_log, log, bbm_log_size);
1005 } else {
1006 super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE);
1007 super->bbm_log->entry_count = 0;
1008 }
1009
1010 return 0;
1011 }
1012
1013 /* checks if bad block is within volume boundaries */
1014 static int is_bad_block_in_volume(const struct bbm_log_entry *entry,
1015 const unsigned long long start_sector,
1016 const unsigned long long size)
1017 {
1018 unsigned long long bb_start;
1019 unsigned long long bb_end;
1020
1021 bb_start = __le48_to_cpu(&entry->defective_block_start);
1022 bb_end = bb_start + (entry->marked_count + 1);
1023
1024 if (((bb_start >= start_sector) && (bb_start < start_sector + size)) ||
1025 ((bb_end >= start_sector) && (bb_end <= start_sector + size)))
1026 return 1;
1027
1028 return 0;
1029 }
1030
1031 /* get list of bad blocks on a drive for a volume */
1032 static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx,
1033 const unsigned long long start_sector,
1034 const unsigned long long size,
1035 struct md_bb *bbs)
1036 {
1037 __u32 count = 0;
1038 __u32 i;
1039
1040 for (i = 0; i < log->entry_count; i++) {
1041 const struct bbm_log_entry *ent =
1042 &log->marked_block_entries[i];
1043 struct md_bb_entry *bb;
1044
1045 if ((ent->disk_ordinal == idx) &&
1046 is_bad_block_in_volume(ent, start_sector, size)) {
1047
1048 if (!bbs->entries) {
1049 bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES *
1050 sizeof(*bb));
1051 if (!bbs->entries)
1052 break;
1053 }
1054
1055 bb = &bbs->entries[count++];
1056 bb->sector = __le48_to_cpu(&ent->defective_block_start);
1057 bb->length = ent->marked_count + 1;
1058 }
1059 }
1060 bbs->count = count;
1061 }
1062
1063 /*
1064 * for second_map:
1065 * == MAP_0 get first map
1066 * == MAP_1 get second map
1067 * == MAP_X than get map according to the current migr_state
1068 */
1069 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
1070 int slot,
1071 int second_map)
1072 {
1073 struct imsm_map *map;
1074
1075 map = get_imsm_map(dev, second_map);
1076
1077 /* top byte identifies disk under rebuild */
1078 return __le32_to_cpu(map->disk_ord_tbl[slot]);
1079 }
1080
1081 #define ord_to_idx(ord) (((ord) << 8) >> 8)
1082 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
1083 {
1084 __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
1085
1086 return ord_to_idx(ord);
1087 }
1088
1089 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
1090 {
1091 map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
1092 }
1093
1094 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
1095 {
1096 int slot;
1097 __u32 ord;
1098
1099 for (slot = 0; slot < map->num_members; slot++) {
1100 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
1101 if (ord_to_idx(ord) == idx)
1102 return slot;
1103 }
1104
1105 return -1;
1106 }
1107
1108 static int get_imsm_raid_level(struct imsm_map *map)
1109 {
1110 if (map->raid_level == 1) {
1111 if (map->num_members == 2)
1112 return 1;
1113 else
1114 return 10;
1115 }
1116
1117 return map->raid_level;
1118 }
1119
1120 static int cmp_extent(const void *av, const void *bv)
1121 {
1122 const struct extent *a = av;
1123 const struct extent *b = bv;
1124 if (a->start < b->start)
1125 return -1;
1126 if (a->start > b->start)
1127 return 1;
1128 return 0;
1129 }
1130
1131 static int count_memberships(struct dl *dl, struct intel_super *super)
1132 {
1133 int memberships = 0;
1134 int i;
1135
1136 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1137 struct imsm_dev *dev = get_imsm_dev(super, i);
1138 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1139
1140 if (get_imsm_disk_slot(map, dl->index) >= 0)
1141 memberships++;
1142 }
1143
1144 return memberships;
1145 }
1146
1147 static __u32 imsm_min_reserved_sectors(struct intel_super *super);
1148
1149 static int split_ull(unsigned long long n, __u32 *lo, __u32 *hi)
1150 {
1151 if (lo == 0 || hi == 0)
1152 return 1;
1153 *lo = __le32_to_cpu((unsigned)n);
1154 *hi = __le32_to_cpu((unsigned)(n >> 32));
1155 return 0;
1156 }
1157
1158 static unsigned long long join_u32(__u32 lo, __u32 hi)
1159 {
1160 return (unsigned long long)__le32_to_cpu(lo) |
1161 (((unsigned long long)__le32_to_cpu(hi)) << 32);
1162 }
1163
1164 static unsigned long long total_blocks(struct imsm_disk *disk)
1165 {
1166 if (disk == NULL)
1167 return 0;
1168 return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
1169 }
1170
1171 static unsigned long long pba_of_lba0(struct imsm_map *map)
1172 {
1173 if (map == NULL)
1174 return 0;
1175 return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi);
1176 }
1177
1178 static unsigned long long blocks_per_member(struct imsm_map *map)
1179 {
1180 if (map == NULL)
1181 return 0;
1182 return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
1183 }
1184
1185 static unsigned long long num_data_stripes(struct imsm_map *map)
1186 {
1187 if (map == NULL)
1188 return 0;
1189 return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
1190 }
1191
1192 static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
1193 {
1194 split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
1195 }
1196
1197 static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
1198 {
1199 split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
1200 }
1201
1202 static void set_blocks_per_member(struct imsm_map *map, unsigned long long n)
1203 {
1204 split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi);
1205 }
1206
1207 static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
1208 {
1209 split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
1210 }
1211
1212 static struct extent *get_extents(struct intel_super *super, struct dl *dl)
1213 {
1214 /* find a list of used extents on the given physical device */
1215 struct extent *rv, *e;
1216 int i;
1217 int memberships = count_memberships(dl, super);
1218 __u32 reservation;
1219
1220 /* trim the reserved area for spares, so they can join any array
1221 * regardless of whether the OROM has assigned sectors from the
1222 * IMSM_RESERVED_SECTORS region
1223 */
1224 if (dl->index == -1)
1225 reservation = imsm_min_reserved_sectors(super);
1226 else
1227 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1228
1229 rv = xcalloc(sizeof(struct extent), (memberships + 1));
1230 e = rv;
1231
1232 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1233 struct imsm_dev *dev = get_imsm_dev(super, i);
1234 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1235
1236 if (get_imsm_disk_slot(map, dl->index) >= 0) {
1237 e->start = pba_of_lba0(map);
1238 e->size = blocks_per_member(map);
1239 e++;
1240 }
1241 }
1242 qsort(rv, memberships, sizeof(*rv), cmp_extent);
1243
1244 /* determine the start of the metadata
1245 * when no raid devices are defined use the default
1246 * ...otherwise allow the metadata to truncate the value
1247 * as is the case with older versions of imsm
1248 */
1249 if (memberships) {
1250 struct extent *last = &rv[memberships - 1];
1251 unsigned long long remainder;
1252
1253 remainder = total_blocks(&dl->disk) - (last->start + last->size);
1254 /* round down to 1k block to satisfy precision of the kernel
1255 * 'size' interface
1256 */
1257 remainder &= ~1UL;
1258 /* make sure remainder is still sane */
1259 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
1260 remainder = ROUND_UP(super->len, 512) >> 9;
1261 if (reservation > remainder)
1262 reservation = remainder;
1263 }
1264 e->start = total_blocks(&dl->disk) - reservation;
1265 e->size = 0;
1266 return rv;
1267 }
1268
1269 /* try to determine how much space is reserved for metadata from
1270 * the last get_extents() entry, otherwise fallback to the
1271 * default
1272 */
1273 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
1274 {
1275 struct extent *e;
1276 int i;
1277 __u32 rv;
1278
1279 /* for spares just return a minimal reservation which will grow
1280 * once the spare is picked up by an array
1281 */
1282 if (dl->index == -1)
1283 return MPB_SECTOR_CNT;
1284
1285 e = get_extents(super, dl);
1286 if (!e)
1287 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1288
1289 /* scroll to last entry */
1290 for (i = 0; e[i].size; i++)
1291 continue;
1292
1293 rv = total_blocks(&dl->disk) - e[i].start;
1294
1295 free(e);
1296
1297 return rv;
1298 }
1299
1300 static int is_spare(struct imsm_disk *disk)
1301 {
1302 return (disk->status & SPARE_DISK) == SPARE_DISK;
1303 }
1304
1305 static int is_configured(struct imsm_disk *disk)
1306 {
1307 return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
1308 }
1309
1310 static int is_failed(struct imsm_disk *disk)
1311 {
1312 return (disk->status & FAILED_DISK) == FAILED_DISK;
1313 }
1314
1315 static int is_journal(struct imsm_disk *disk)
1316 {
1317 return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
1318 }
1319
1320 /* try to determine how much space is reserved for metadata from
1321 * the last get_extents() entry on the smallest active disk,
1322 * otherwise fallback to the default
1323 */
1324 static __u32 imsm_min_reserved_sectors(struct intel_super *super)
1325 {
1326 struct extent *e;
1327 int i;
1328 unsigned long long min_active;
1329 __u32 remainder;
1330 __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1331 struct dl *dl, *dl_min = NULL;
1332
1333 if (!super)
1334 return rv;
1335
1336 min_active = 0;
1337 for (dl = super->disks; dl; dl = dl->next) {
1338 if (dl->index < 0)
1339 continue;
1340 unsigned long long blocks = total_blocks(&dl->disk);
1341 if (blocks < min_active || min_active == 0) {
1342 dl_min = dl;
1343 min_active = blocks;
1344 }
1345 }
1346 if (!dl_min)
1347 return rv;
1348
1349 /* find last lba used by subarrays on the smallest active disk */
1350 e = get_extents(super, dl_min);
1351 if (!e)
1352 return rv;
1353 for (i = 0; e[i].size; i++)
1354 continue;
1355
1356 remainder = min_active - e[i].start;
1357 free(e);
1358
1359 /* to give priority to recovery we should not require full
1360 IMSM_RESERVED_SECTORS from the spare */
1361 rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
1362
1363 /* if real reservation is smaller use that value */
1364 return (remainder < rv) ? remainder : rv;
1365 }
1366
1367 /* Return minimum size of a spare that can be used in this array*/
1368 static unsigned long long min_acceptable_spare_size_imsm(struct supertype *st)
1369 {
1370 struct intel_super *super = st->sb;
1371 struct dl *dl;
1372 struct extent *e;
1373 int i;
1374 unsigned long long rv = 0;
1375
1376 if (!super)
1377 return rv;
1378 /* find first active disk in array */
1379 dl = super->disks;
1380 while (dl && (is_failed(&dl->disk) || dl->index == -1))
1381 dl = dl->next;
1382 if (!dl)
1383 return rv;
1384 /* find last lba used by subarrays */
1385 e = get_extents(super, dl);
1386 if (!e)
1387 return rv;
1388 for (i = 0; e[i].size; i++)
1389 continue;
1390 if (i > 0)
1391 rv = e[i-1].start + e[i-1].size;
1392 free(e);
1393
1394 /* add the amount of space needed for metadata */
1395 rv = rv + imsm_min_reserved_sectors(super);
1396
1397 return rv * 512;
1398 }
1399
1400 static int is_gen_migration(struct imsm_dev *dev);
1401
1402 #define IMSM_4K_DIV 8
1403
1404 #ifndef MDASSEMBLE
1405 static __u64 blocks_per_migr_unit(struct intel_super *super,
1406 struct imsm_dev *dev);
1407
1408 static void print_imsm_dev(struct intel_super *super,
1409 struct imsm_dev *dev,
1410 char *uuid,
1411 int disk_idx)
1412 {
1413 __u64 sz;
1414 int slot, i;
1415 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1416 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
1417 __u32 ord;
1418
1419 printf("\n");
1420 printf("[%.16s]:\n", dev->volume);
1421 printf(" UUID : %s\n", uuid);
1422 printf(" RAID Level : %d", get_imsm_raid_level(map));
1423 if (map2)
1424 printf(" <-- %d", get_imsm_raid_level(map2));
1425 printf("\n");
1426 printf(" Members : %d", map->num_members);
1427 if (map2)
1428 printf(" <-- %d", map2->num_members);
1429 printf("\n");
1430 printf(" Slots : [");
1431 for (i = 0; i < map->num_members; i++) {
1432 ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
1433 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1434 }
1435 printf("]");
1436 if (map2) {
1437 printf(" <-- [");
1438 for (i = 0; i < map2->num_members; i++) {
1439 ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
1440 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1441 }
1442 printf("]");
1443 }
1444 printf("\n");
1445 printf(" Failed disk : ");
1446 if (map->failed_disk_num == 0xff)
1447 printf("none");
1448 else
1449 printf("%i", map->failed_disk_num);
1450 printf("\n");
1451 slot = get_imsm_disk_slot(map, disk_idx);
1452 if (slot >= 0) {
1453 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
1454 printf(" This Slot : %d%s\n", slot,
1455 ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
1456 } else
1457 printf(" This Slot : ?\n");
1458 sz = __le32_to_cpu(dev->size_high);
1459 sz <<= 32;
1460 sz += __le32_to_cpu(dev->size_low);
1461 printf(" Array Size : %llu%s\n", (unsigned long long)sz,
1462 human_size(sz * 512));
1463 sz = blocks_per_member(map);
1464 printf(" Per Dev Size : %llu%s\n", (unsigned long long)sz,
1465 human_size(sz * 512));
1466 printf(" Sector Offset : %llu\n",
1467 pba_of_lba0(map));
1468 printf(" Num Stripes : %llu\n",
1469 num_data_stripes(map));
1470 printf(" Chunk Size : %u KiB",
1471 __le16_to_cpu(map->blocks_per_strip) / 2);
1472 if (map2)
1473 printf(" <-- %u KiB",
1474 __le16_to_cpu(map2->blocks_per_strip) / 2);
1475 printf("\n");
1476 printf(" Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
1477 printf(" Migrate State : ");
1478 if (dev->vol.migr_state) {
1479 if (migr_type(dev) == MIGR_INIT)
1480 printf("initialize\n");
1481 else if (migr_type(dev) == MIGR_REBUILD)
1482 printf("rebuild\n");
1483 else if (migr_type(dev) == MIGR_VERIFY)
1484 printf("check\n");
1485 else if (migr_type(dev) == MIGR_GEN_MIGR)
1486 printf("general migration\n");
1487 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1488 printf("state change\n");
1489 else if (migr_type(dev) == MIGR_REPAIR)
1490 printf("repair\n");
1491 else
1492 printf("<unknown:%d>\n", migr_type(dev));
1493 } else
1494 printf("idle\n");
1495 printf(" Map State : %s", map_state_str[map->map_state]);
1496 if (dev->vol.migr_state) {
1497 struct imsm_map *map = get_imsm_map(dev, MAP_1);
1498
1499 printf(" <-- %s", map_state_str[map->map_state]);
1500 printf("\n Checkpoint : %u ",
1501 __le32_to_cpu(dev->vol.curr_migr_unit));
1502 if (is_gen_migration(dev) && (slot > 1 || slot < 0))
1503 printf("(N/A)");
1504 else
1505 printf("(%llu)", (unsigned long long)
1506 blocks_per_migr_unit(super, dev));
1507 }
1508 printf("\n");
1509 printf(" Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
1510 "dirty" : "clean");
1511 printf(" RWH Policy : ");
1512 if (dev->rwh_policy == RWH_OFF)
1513 printf("off\n");
1514 else if (dev->rwh_policy == RWH_DISTRIBUTED)
1515 printf("PPL distributed\n");
1516 else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
1517 printf("PPL journaling drive\n");
1518 else
1519 printf("<unknown:%d>\n", dev->rwh_policy);
1520 }
1521
1522 static void print_imsm_disk(struct imsm_disk *disk,
1523 int index,
1524 __u32 reserved,
1525 unsigned int sector_size) {
1526 char str[MAX_RAID_SERIAL_LEN + 1];
1527 __u64 sz;
1528
1529 if (index < -1 || !disk)
1530 return;
1531
1532 printf("\n");
1533 snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1534 if (index >= 0)
1535 printf(" Disk%02d Serial : %s\n", index, str);
1536 else
1537 printf(" Disk Serial : %s\n", str);
1538 printf(" State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
1539 is_configured(disk) ? " active" : "",
1540 is_failed(disk) ? " failed" : "",
1541 is_journal(disk) ? " journal" : "");
1542 printf(" Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1543 sz = total_blocks(disk) - reserved;
1544 printf(" Usable Size : %llu%s\n",
1545 (unsigned long long)sz * 512 / sector_size,
1546 human_size(sz * 512));
1547 }
1548
1549 void convert_to_4k_imsm_migr_rec(struct intel_super *super)
1550 {
1551 struct migr_record *migr_rec = super->migr_rec;
1552
1553 migr_rec->blocks_per_unit /= IMSM_4K_DIV;
1554 migr_rec->ckpt_area_pba /= IMSM_4K_DIV;
1555 migr_rec->dest_1st_member_lba /= IMSM_4K_DIV;
1556 migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
1557 split_ull((join_u32(migr_rec->post_migr_vol_cap,
1558 migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
1559 &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
1560 }
1561
1562 void convert_to_4k_imsm_disk(struct imsm_disk *disk)
1563 {
1564 set_total_blocks(disk, (total_blocks(disk)/IMSM_4K_DIV));
1565 }
1566
1567 void convert_to_4k(struct intel_super *super)
1568 {
1569 struct imsm_super *mpb = super->anchor;
1570 struct imsm_disk *disk;
1571 int i;
1572 __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1573
1574 for (i = 0; i < mpb->num_disks ; i++) {
1575 disk = __get_imsm_disk(mpb, i);
1576 /* disk */
1577 convert_to_4k_imsm_disk(disk);
1578 }
1579 for (i = 0; i < mpb->num_raid_devs; i++) {
1580 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1581 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1582 /* dev */
1583 split_ull((join_u32(dev->size_low, dev->size_high)/IMSM_4K_DIV),
1584 &dev->size_low, &dev->size_high);
1585 dev->vol.curr_migr_unit /= IMSM_4K_DIV;
1586
1587 /* map0 */
1588 set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV);
1589 map->blocks_per_strip /= IMSM_4K_DIV;
1590 set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1591
1592 if (dev->vol.migr_state) {
1593 /* map1 */
1594 map = get_imsm_map(dev, MAP_1);
1595 set_blocks_per_member(map,
1596 blocks_per_member(map)/IMSM_4K_DIV);
1597 map->blocks_per_strip /= IMSM_4K_DIV;
1598 set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1599 }
1600 }
1601 if (bbm_log_size) {
1602 struct bbm_log *log = (void *)mpb +
1603 __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1604 __u32 i;
1605
1606 for (i = 0; i < log->entry_count; i++) {
1607 struct bbm_log_entry *entry =
1608 &log->marked_block_entries[i];
1609
1610 __u8 count = entry->marked_count + 1;
1611 unsigned long long sector =
1612 __le48_to_cpu(&entry->defective_block_start);
1613
1614 entry->defective_block_start =
1615 __cpu_to_le48(sector/IMSM_4K_DIV);
1616 entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1;
1617 }
1618 }
1619
1620 mpb->check_sum = __gen_imsm_checksum(mpb);
1621 }
1622
1623 void examine_migr_rec_imsm(struct intel_super *super)
1624 {
1625 struct migr_record *migr_rec = super->migr_rec;
1626 struct imsm_super *mpb = super->anchor;
1627 int i;
1628
1629 for (i = 0; i < mpb->num_raid_devs; i++) {
1630 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1631 struct imsm_map *map;
1632 int slot = -1;
1633
1634 if (is_gen_migration(dev) == 0)
1635 continue;
1636
1637 printf("\nMigration Record Information:");
1638
1639 /* first map under migration */
1640 map = get_imsm_map(dev, MAP_0);
1641 if (map)
1642 slot = get_imsm_disk_slot(map, super->disks->index);
1643 if (map == NULL || slot > 1 || slot < 0) {
1644 printf(" Empty\n ");
1645 printf("Examine one of first two disks in array\n");
1646 break;
1647 }
1648 printf("\n Status : ");
1649 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1650 printf("Normal\n");
1651 else
1652 printf("Contains Data\n");
1653 printf(" Current Unit : %u\n",
1654 __le32_to_cpu(migr_rec->curr_migr_unit));
1655 printf(" Family : %u\n",
1656 __le32_to_cpu(migr_rec->family_num));
1657 printf(" Ascending : %u\n",
1658 __le32_to_cpu(migr_rec->ascending_migr));
1659 printf(" Blocks Per Unit : %u\n",
1660 __le32_to_cpu(migr_rec->blocks_per_unit));
1661 printf(" Dest. Depth Per Unit : %u\n",
1662 __le32_to_cpu(migr_rec->dest_depth_per_unit));
1663 printf(" Checkpoint Area pba : %u\n",
1664 __le32_to_cpu(migr_rec->ckpt_area_pba));
1665 printf(" First member lba : %u\n",
1666 __le32_to_cpu(migr_rec->dest_1st_member_lba));
1667 printf(" Total Number of Units : %u\n",
1668 __le32_to_cpu(migr_rec->num_migr_units));
1669 printf(" Size of volume : %u\n",
1670 __le32_to_cpu(migr_rec->post_migr_vol_cap));
1671 printf(" Expansion space for LBA64 : %u\n",
1672 __le32_to_cpu(migr_rec->post_migr_vol_cap_hi));
1673 printf(" Record was read from : %u\n",
1674 __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1675
1676 break;
1677 }
1678 }
1679 #endif /* MDASSEMBLE */
1680
1681 void convert_from_4k_imsm_migr_rec(struct intel_super *super)
1682 {
1683 struct migr_record *migr_rec = super->migr_rec;
1684
1685 migr_rec->blocks_per_unit *= IMSM_4K_DIV;
1686 migr_rec->ckpt_area_pba *= IMSM_4K_DIV;
1687 migr_rec->dest_1st_member_lba *= IMSM_4K_DIV;
1688 migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
1689 split_ull((join_u32(migr_rec->post_migr_vol_cap,
1690 migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
1691 &migr_rec->post_migr_vol_cap,
1692 &migr_rec->post_migr_vol_cap_hi);
1693 }
1694
1695 void convert_from_4k(struct intel_super *super)
1696 {
1697 struct imsm_super *mpb = super->anchor;
1698 struct imsm_disk *disk;
1699 int i;
1700 __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1701
1702 for (i = 0; i < mpb->num_disks ; i++) {
1703 disk = __get_imsm_disk(mpb, i);
1704 /* disk */
1705 set_total_blocks(disk, (total_blocks(disk)*IMSM_4K_DIV));
1706 }
1707
1708 for (i = 0; i < mpb->num_raid_devs; i++) {
1709 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1710 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1711 /* dev */
1712 split_ull((join_u32(dev->size_low, dev->size_high)*IMSM_4K_DIV),
1713 &dev->size_low, &dev->size_high);
1714 dev->vol.curr_migr_unit *= IMSM_4K_DIV;
1715
1716 /* map0 */
1717 set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV);
1718 map->blocks_per_strip *= IMSM_4K_DIV;
1719 set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
1720
1721 if (dev->vol.migr_state) {
1722 /* map1 */
1723 map = get_imsm_map(dev, MAP_1);
1724 set_blocks_per_member(map,
1725 blocks_per_member(map)*IMSM_4K_DIV);
1726 map->blocks_per_strip *= IMSM_4K_DIV;
1727 set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
1728 }
1729 }
1730 if (bbm_log_size) {
1731 struct bbm_log *log = (void *)mpb +
1732 __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1733 __u32 i;
1734
1735 for (i = 0; i < log->entry_count; i++) {
1736 struct bbm_log_entry *entry =
1737 &log->marked_block_entries[i];
1738
1739 __u8 count = entry->marked_count + 1;
1740 unsigned long long sector =
1741 __le48_to_cpu(&entry->defective_block_start);
1742
1743 entry->defective_block_start =
1744 __cpu_to_le48(sector*IMSM_4K_DIV);
1745 entry->marked_count = count*IMSM_4K_DIV - 1;
1746 }
1747 }
1748
1749 mpb->check_sum = __gen_imsm_checksum(mpb);
1750 }
1751
1752 /*******************************************************************************
1753 * function: imsm_check_attributes
1754 * Description: Function checks if features represented by attributes flags
1755 * are supported by mdadm.
1756 * Parameters:
1757 * attributes - Attributes read from metadata
1758 * Returns:
1759 * 0 - passed attributes contains unsupported features flags
1760 * 1 - all features are supported
1761 ******************************************************************************/
1762 static int imsm_check_attributes(__u32 attributes)
1763 {
1764 int ret_val = 1;
1765 __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
1766
1767 not_supported &= ~MPB_ATTRIB_IGNORED;
1768
1769 not_supported &= attributes;
1770 if (not_supported) {
1771 pr_err("(IMSM): Unsupported attributes : %x\n",
1772 (unsigned)__le32_to_cpu(not_supported));
1773 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1774 dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
1775 not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1776 }
1777 if (not_supported & MPB_ATTRIB_2TB) {
1778 dprintf("\t\tMPB_ATTRIB_2TB\n");
1779 not_supported ^= MPB_ATTRIB_2TB;
1780 }
1781 if (not_supported & MPB_ATTRIB_RAID0) {
1782 dprintf("\t\tMPB_ATTRIB_RAID0\n");
1783 not_supported ^= MPB_ATTRIB_RAID0;
1784 }
1785 if (not_supported & MPB_ATTRIB_RAID1) {
1786 dprintf("\t\tMPB_ATTRIB_RAID1\n");
1787 not_supported ^= MPB_ATTRIB_RAID1;
1788 }
1789 if (not_supported & MPB_ATTRIB_RAID10) {
1790 dprintf("\t\tMPB_ATTRIB_RAID10\n");
1791 not_supported ^= MPB_ATTRIB_RAID10;
1792 }
1793 if (not_supported & MPB_ATTRIB_RAID1E) {
1794 dprintf("\t\tMPB_ATTRIB_RAID1E\n");
1795 not_supported ^= MPB_ATTRIB_RAID1E;
1796 }
1797 if (not_supported & MPB_ATTRIB_RAID5) {
1798 dprintf("\t\tMPB_ATTRIB_RAID5\n");
1799 not_supported ^= MPB_ATTRIB_RAID5;
1800 }
1801 if (not_supported & MPB_ATTRIB_RAIDCNG) {
1802 dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
1803 not_supported ^= MPB_ATTRIB_RAIDCNG;
1804 }
1805 if (not_supported & MPB_ATTRIB_BBM) {
1806 dprintf("\t\tMPB_ATTRIB_BBM\n");
1807 not_supported ^= MPB_ATTRIB_BBM;
1808 }
1809 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1810 dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
1811 not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1812 }
1813 if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
1814 dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
1815 not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
1816 }
1817 if (not_supported & MPB_ATTRIB_2TB_DISK) {
1818 dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
1819 not_supported ^= MPB_ATTRIB_2TB_DISK;
1820 }
1821 if (not_supported & MPB_ATTRIB_NEVER_USE2) {
1822 dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
1823 not_supported ^= MPB_ATTRIB_NEVER_USE2;
1824 }
1825 if (not_supported & MPB_ATTRIB_NEVER_USE) {
1826 dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
1827 not_supported ^= MPB_ATTRIB_NEVER_USE;
1828 }
1829
1830 if (not_supported)
1831 dprintf("(IMSM): Unknown attributes : %x\n", not_supported);
1832
1833 ret_val = 0;
1834 }
1835
1836 return ret_val;
1837 }
1838
1839 #ifndef MDASSEMBLE
1840 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
1841
1842 static void examine_super_imsm(struct supertype *st, char *homehost)
1843 {
1844 struct intel_super *super = st->sb;
1845 struct imsm_super *mpb = super->anchor;
1846 char str[MAX_SIGNATURE_LENGTH];
1847 int i;
1848 struct mdinfo info;
1849 char nbuf[64];
1850 __u32 sum;
1851 __u32 reserved = imsm_reserved_sectors(super, super->disks);
1852 struct dl *dl;
1853
1854 strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
1855 str[MPB_SIG_LEN-1] = '\0';
1856 printf(" Magic : %s\n", str);
1857 snprintf(str, strlen(MPB_VERSION_RAID0), "%s", get_imsm_version(mpb));
1858 printf(" Version : %s\n", get_imsm_version(mpb));
1859 printf(" Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
1860 printf(" Family : %08x\n", __le32_to_cpu(mpb->family_num));
1861 printf(" Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
1862 printf(" Attributes : ");
1863 if (imsm_check_attributes(mpb->attributes))
1864 printf("All supported\n");
1865 else
1866 printf("not supported\n");
1867 getinfo_super_imsm(st, &info, NULL);
1868 fname_from_uuid(st, &info, nbuf, ':');
1869 printf(" UUID : %s\n", nbuf + 5);
1870 sum = __le32_to_cpu(mpb->check_sum);
1871 printf(" Checksum : %08x %s\n", sum,
1872 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
1873 printf(" MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size));
1874 printf(" Disks : %d\n", mpb->num_disks);
1875 printf(" RAID Devices : %d\n", mpb->num_raid_devs);
1876 print_imsm_disk(__get_imsm_disk(mpb, super->disks->index),
1877 super->disks->index, reserved, super->sector_size);
1878 if (get_imsm_bbm_log_size(super->bbm_log)) {
1879 struct bbm_log *log = super->bbm_log;
1880
1881 printf("\n");
1882 printf("Bad Block Management Log:\n");
1883 printf(" Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
1884 printf(" Signature : %x\n", __le32_to_cpu(log->signature));
1885 printf(" Entry Count : %d\n", __le32_to_cpu(log->entry_count));
1886 }
1887 for (i = 0; i < mpb->num_raid_devs; i++) {
1888 struct mdinfo info;
1889 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1890
1891 super->current_vol = i;
1892 getinfo_super_imsm(st, &info, NULL);
1893 fname_from_uuid(st, &info, nbuf, ':');
1894 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
1895 }
1896 for (i = 0; i < mpb->num_disks; i++) {
1897 if (i == super->disks->index)
1898 continue;
1899 print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved,
1900 super->sector_size);
1901 }
1902
1903 for (dl = super->disks; dl; dl = dl->next)
1904 if (dl->index == -1)
1905 print_imsm_disk(&dl->disk, -1, reserved,
1906 super->sector_size);
1907
1908 examine_migr_rec_imsm(super);
1909 }
1910
1911 static void brief_examine_super_imsm(struct supertype *st, int verbose)
1912 {
1913 /* We just write a generic IMSM ARRAY entry */
1914 struct mdinfo info;
1915 char nbuf[64];
1916 struct intel_super *super = st->sb;
1917
1918 if (!super->anchor->num_raid_devs) {
1919 printf("ARRAY metadata=imsm\n");
1920 return;
1921 }
1922
1923 getinfo_super_imsm(st, &info, NULL);
1924 fname_from_uuid(st, &info, nbuf, ':');
1925 printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
1926 }
1927
1928 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
1929 {
1930 /* We just write a generic IMSM ARRAY entry */
1931 struct mdinfo info;
1932 char nbuf[64];
1933 char nbuf1[64];
1934 struct intel_super *super = st->sb;
1935 int i;
1936
1937 if (!super->anchor->num_raid_devs)
1938 return;
1939
1940 getinfo_super_imsm(st, &info, NULL);
1941 fname_from_uuid(st, &info, nbuf, ':');
1942 for (i = 0; i < super->anchor->num_raid_devs; i++) {
1943 struct imsm_dev *dev = get_imsm_dev(super, i);
1944
1945 super->current_vol = i;
1946 getinfo_super_imsm(st, &info, NULL);
1947 fname_from_uuid(st, &info, nbuf1, ':');
1948 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
1949 dev->volume, nbuf + 5, i, nbuf1 + 5);
1950 }
1951 }
1952
1953 static void export_examine_super_imsm(struct supertype *st)
1954 {
1955 struct intel_super *super = st->sb;
1956 struct imsm_super *mpb = super->anchor;
1957 struct mdinfo info;
1958 char nbuf[64];
1959
1960 getinfo_super_imsm(st, &info, NULL);
1961 fname_from_uuid(st, &info, nbuf, ':');
1962 printf("MD_METADATA=imsm\n");
1963 printf("MD_LEVEL=container\n");
1964 printf("MD_UUID=%s\n", nbuf+5);
1965 printf("MD_DEVICES=%u\n", mpb->num_disks);
1966 }
1967
1968 static int copy_metadata_imsm(struct supertype *st, int from, int to)
1969 {
1970 /* The second last sector of the device contains
1971 * the "struct imsm_super" metadata.
1972 * This contains mpb_size which is the size in bytes of the
1973 * extended metadata. This is located immediately before
1974 * the imsm_super.
1975 * We want to read all that, plus the last sector which
1976 * may contain a migration record, and write it all
1977 * to the target.
1978 */
1979 void *buf;
1980 unsigned long long dsize, offset;
1981 int sectors;
1982 struct imsm_super *sb;
1983 struct intel_super *super = st->sb;
1984 unsigned int sector_size = super->sector_size;
1985 unsigned int written = 0;
1986
1987 if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
1988 return 1;
1989
1990 if (!get_dev_size(from, NULL, &dsize))
1991 goto err;
1992
1993 if (lseek64(from, dsize-(2*sector_size), 0) < 0)
1994 goto err;
1995 if ((unsigned int)read(from, buf, sector_size) != sector_size)
1996 goto err;
1997 sb = buf;
1998 if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
1999 goto err;
2000
2001 sectors = mpb_sectors(sb, sector_size) + 2;
2002 offset = dsize - sectors * sector_size;
2003 if (lseek64(from, offset, 0) < 0 ||
2004 lseek64(to, offset, 0) < 0)
2005 goto err;
2006 while (written < sectors * sector_size) {
2007 int n = sectors*sector_size - written;
2008 if (n > 4096)
2009 n = 4096;
2010 if (read(from, buf, n) != n)
2011 goto err;
2012 if (write(to, buf, n) != n)
2013 goto err;
2014 written += n;
2015 }
2016 free(buf);
2017 return 0;
2018 err:
2019 free(buf);
2020 return 1;
2021 }
2022
2023 static void detail_super_imsm(struct supertype *st, char *homehost)
2024 {
2025 struct mdinfo info;
2026 char nbuf[64];
2027
2028 getinfo_super_imsm(st, &info, NULL);
2029 fname_from_uuid(st, &info, nbuf, ':');
2030 printf("\n UUID : %s\n", nbuf + 5);
2031 }
2032
2033 static void brief_detail_super_imsm(struct supertype *st)
2034 {
2035 struct mdinfo info;
2036 char nbuf[64];
2037 getinfo_super_imsm(st, &info, NULL);
2038 fname_from_uuid(st, &info, nbuf, ':');
2039 printf(" UUID=%s", nbuf + 5);
2040 }
2041
2042 static int imsm_read_serial(int fd, char *devname, __u8 *serial);
2043 static void fd2devname(int fd, char *name);
2044
2045 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
2046 {
2047 /* dump an unsorted list of devices attached to AHCI Intel storage
2048 * controller, as well as non-connected ports
2049 */
2050 int hba_len = strlen(hba_path) + 1;
2051 struct dirent *ent;
2052 DIR *dir;
2053 char *path = NULL;
2054 int err = 0;
2055 unsigned long port_mask = (1 << port_count) - 1;
2056
2057 if (port_count > (int)sizeof(port_mask) * 8) {
2058 if (verbose > 0)
2059 pr_err("port_count %d out of range\n", port_count);
2060 return 2;
2061 }
2062
2063 /* scroll through /sys/dev/block looking for devices attached to
2064 * this hba
2065 */
2066 dir = opendir("/sys/dev/block");
2067 if (!dir)
2068 return 1;
2069
2070 for (ent = readdir(dir); ent; ent = readdir(dir)) {
2071 int fd;
2072 char model[64];
2073 char vendor[64];
2074 char buf[1024];
2075 int major, minor;
2076 char *device;
2077 char *c;
2078 int port;
2079 int type;
2080
2081 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
2082 continue;
2083 path = devt_to_devpath(makedev(major, minor));
2084 if (!path)
2085 continue;
2086 if (!path_attached_to_hba(path, hba_path)) {
2087 free(path);
2088 path = NULL;
2089 continue;
2090 }
2091
2092 /* retrieve the scsi device type */
2093 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
2094 if (verbose > 0)
2095 pr_err("failed to allocate 'device'\n");
2096 err = 2;
2097 break;
2098 }
2099 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
2100 if (load_sys(device, buf, sizeof(buf)) != 0) {
2101 if (verbose > 0)
2102 pr_err("failed to read device type for %s\n",
2103 path);
2104 err = 2;
2105 free(device);
2106 break;
2107 }
2108 type = strtoul(buf, NULL, 10);
2109
2110 /* if it's not a disk print the vendor and model */
2111 if (!(type == 0 || type == 7 || type == 14)) {
2112 vendor[0] = '\0';
2113 model[0] = '\0';
2114 sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
2115 if (load_sys(device, buf, sizeof(buf)) == 0) {
2116 strncpy(vendor, buf, sizeof(vendor));
2117 vendor[sizeof(vendor) - 1] = '\0';
2118 c = (char *) &vendor[sizeof(vendor) - 1];
2119 while (isspace(*c) || *c == '\0')
2120 *c-- = '\0';
2121
2122 }
2123 sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
2124 if (load_sys(device, buf, sizeof(buf)) == 0) {
2125 strncpy(model, buf, sizeof(model));
2126 model[sizeof(model) - 1] = '\0';
2127 c = (char *) &model[sizeof(model) - 1];
2128 while (isspace(*c) || *c == '\0')
2129 *c-- = '\0';
2130 }
2131
2132 if (vendor[0] && model[0])
2133 sprintf(buf, "%.64s %.64s", vendor, model);
2134 else
2135 switch (type) { /* numbers from hald/linux/device.c */
2136 case 1: sprintf(buf, "tape"); break;
2137 case 2: sprintf(buf, "printer"); break;
2138 case 3: sprintf(buf, "processor"); break;
2139 case 4:
2140 case 5: sprintf(buf, "cdrom"); break;
2141 case 6: sprintf(buf, "scanner"); break;
2142 case 8: sprintf(buf, "media_changer"); break;
2143 case 9: sprintf(buf, "comm"); break;
2144 case 12: sprintf(buf, "raid"); break;
2145 default: sprintf(buf, "unknown");
2146 }
2147 } else
2148 buf[0] = '\0';
2149 free(device);
2150
2151 /* chop device path to 'host%d' and calculate the port number */
2152 c = strchr(&path[hba_len], '/');
2153 if (!c) {
2154 if (verbose > 0)
2155 pr_err("%s - invalid path name\n", path + hba_len);
2156 err = 2;
2157 break;
2158 }
2159 *c = '\0';
2160 if ((sscanf(&path[hba_len], "ata%d", &port) == 1) ||
2161 ((sscanf(&path[hba_len], "host%d", &port) == 1)))
2162 port -= host_base;
2163 else {
2164 if (verbose > 0) {
2165 *c = '/'; /* repair the full string */
2166 pr_err("failed to determine port number for %s\n",
2167 path);
2168 }
2169 err = 2;
2170 break;
2171 }
2172
2173 /* mark this port as used */
2174 port_mask &= ~(1 << port);
2175
2176 /* print out the device information */
2177 if (buf[0]) {
2178 printf(" Port%d : - non-disk device (%s) -\n", port, buf);
2179 continue;
2180 }
2181
2182 fd = dev_open(ent->d_name, O_RDONLY);
2183 if (fd < 0)
2184 printf(" Port%d : - disk info unavailable -\n", port);
2185 else {
2186 fd2devname(fd, buf);
2187 printf(" Port%d : %s", port, buf);
2188 if (imsm_read_serial(fd, NULL, (__u8 *) buf) == 0)
2189 printf(" (%.*s)\n", MAX_RAID_SERIAL_LEN, buf);
2190 else
2191 printf(" ()\n");
2192 close(fd);
2193 }
2194 free(path);
2195 path = NULL;
2196 }
2197 if (path)
2198 free(path);
2199 if (dir)
2200 closedir(dir);
2201 if (err == 0) {
2202 int i;
2203
2204 for (i = 0; i < port_count; i++)
2205 if (port_mask & (1 << i))
2206 printf(" Port%d : - no device attached -\n", i);
2207 }
2208
2209 return err;
2210 }
2211
2212 static int print_vmd_attached_devs(struct sys_dev *hba)
2213 {
2214 struct dirent *ent;
2215 DIR *dir;
2216 char path[292];
2217 char link[256];
2218 char *c, *rp;
2219
2220 if (hba->type != SYS_DEV_VMD)
2221 return 1;
2222
2223 /* scroll through /sys/dev/block looking for devices attached to
2224 * this hba
2225 */
2226 dir = opendir("/sys/bus/pci/drivers/nvme");
2227 if (!dir)
2228 return 1;
2229
2230 for (ent = readdir(dir); ent; ent = readdir(dir)) {
2231 int n;
2232
2233 /* is 'ent' a device? check that the 'subsystem' link exists and
2234 * that its target matches 'bus'
2235 */
2236 sprintf(path, "/sys/bus/pci/drivers/nvme/%s/subsystem",
2237 ent->d_name);
2238 n = readlink(path, link, sizeof(link));
2239 if (n < 0 || n >= (int)sizeof(link))
2240 continue;
2241 link[n] = '\0';
2242 c = strrchr(link, '/');
2243 if (!c)
2244 continue;
2245 if (strncmp("pci", c+1, strlen("pci")) != 0)
2246 continue;
2247
2248 sprintf(path, "/sys/bus/pci/drivers/nvme/%s", ent->d_name);
2249
2250 rp = realpath(path, NULL);
2251 if (!rp)
2252 continue;
2253
2254 if (path_attached_to_hba(rp, hba->path)) {
2255 printf(" NVMe under VMD : %s\n", rp);
2256 }
2257 free(rp);
2258 }
2259
2260 closedir(dir);
2261 return 0;
2262 }
2263
2264 static void print_found_intel_controllers(struct sys_dev *elem)
2265 {
2266 for (; elem; elem = elem->next) {
2267 pr_err("found Intel(R) ");
2268 if (elem->type == SYS_DEV_SATA)
2269 fprintf(stderr, "SATA ");
2270 else if (elem->type == SYS_DEV_SAS)
2271 fprintf(stderr, "SAS ");
2272 else if (elem->type == SYS_DEV_NVME)
2273 fprintf(stderr, "NVMe ");
2274
2275 if (elem->type == SYS_DEV_VMD)
2276 fprintf(stderr, "VMD domain");
2277 else
2278 fprintf(stderr, "RAID controller");
2279
2280 if (elem->pci_id)
2281 fprintf(stderr, " at %s", elem->pci_id);
2282 fprintf(stderr, ".\n");
2283 }
2284 fflush(stderr);
2285 }
2286
2287 static int ahci_get_port_count(const char *hba_path, int *port_count)
2288 {
2289 struct dirent *ent;
2290 DIR *dir;
2291 int host_base = -1;
2292
2293 *port_count = 0;
2294 if ((dir = opendir(hba_path)) == NULL)
2295 return -1;
2296
2297 for (ent = readdir(dir); ent; ent = readdir(dir)) {
2298 int host;
2299
2300 if ((sscanf(ent->d_name, "ata%d", &host) != 1) &&
2301 ((sscanf(ent->d_name, "host%d", &host) != 1)))
2302 continue;
2303 if (*port_count == 0)
2304 host_base = host;
2305 else if (host < host_base)
2306 host_base = host;
2307
2308 if (host + 1 > *port_count + host_base)
2309 *port_count = host + 1 - host_base;
2310 }
2311 closedir(dir);
2312 return host_base;
2313 }
2314
2315 static void print_imsm_capability(const struct imsm_orom *orom)
2316 {
2317 printf(" Platform : Intel(R) ");
2318 if (orom->capabilities == 0 && orom->driver_features == 0)
2319 printf("Matrix Storage Manager\n");
2320 else
2321 printf("Rapid Storage Technology%s\n",
2322 imsm_orom_is_enterprise(orom) ? " enterprise" : "");
2323 if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2324 printf(" Version : %d.%d.%d.%d\n", orom->major_ver,
2325 orom->minor_ver, orom->hotfix_ver, orom->build);
2326 printf(" RAID Levels :%s%s%s%s%s\n",
2327 imsm_orom_has_raid0(orom) ? " raid0" : "",
2328 imsm_orom_has_raid1(orom) ? " raid1" : "",
2329 imsm_orom_has_raid1e(orom) ? " raid1e" : "",
2330 imsm_orom_has_raid10(orom) ? " raid10" : "",
2331 imsm_orom_has_raid5(orom) ? " raid5" : "");
2332 printf(" Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2333 imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
2334 imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
2335 imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
2336 imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
2337 imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
2338 imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
2339 imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
2340 imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
2341 imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
2342 imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
2343 imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
2344 imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
2345 imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
2346 imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
2347 imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
2348 imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
2349 printf(" 2TB volumes :%s supported\n",
2350 (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not");
2351 printf(" 2TB disks :%s supported\n",
2352 (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not");
2353 printf(" Max Disks : %d\n", orom->tds);
2354 printf(" Max Volumes : %d per array, %d per %s\n",
2355 orom->vpa, orom->vphba,
2356 imsm_orom_is_nvme(orom) ? "platform" : "controller");
2357 return;
2358 }
2359
2360 static void print_imsm_capability_export(const struct imsm_orom *orom)
2361 {
2362 printf("MD_FIRMWARE_TYPE=imsm\n");
2363 if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2364 printf("IMSM_VERSION=%d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
2365 orom->hotfix_ver, orom->build);
2366 printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
2367 imsm_orom_has_raid0(orom) ? "raid0 " : "",
2368 imsm_orom_has_raid1(orom) ? "raid1 " : "",
2369 imsm_orom_has_raid1e(orom) ? "raid1e " : "",
2370 imsm_orom_has_raid5(orom) ? "raid10 " : "",
2371 imsm_orom_has_raid10(orom) ? "raid5 " : "");
2372 printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2373 imsm_orom_has_chunk(orom, 2) ? "2k " : "",
2374 imsm_orom_has_chunk(orom, 4) ? "4k " : "",
2375 imsm_orom_has_chunk(orom, 8) ? "8k " : "",
2376 imsm_orom_has_chunk(orom, 16) ? "16k " : "",
2377 imsm_orom_has_chunk(orom, 32) ? "32k " : "",
2378 imsm_orom_has_chunk(orom, 64) ? "64k " : "",
2379 imsm_orom_has_chunk(orom, 128) ? "128k " : "",
2380 imsm_orom_has_chunk(orom, 256) ? "256k " : "",
2381 imsm_orom_has_chunk(orom, 512) ? "512k " : "",
2382 imsm_orom_has_chunk(orom, 1024*1) ? "1M " : "",
2383 imsm_orom_has_chunk(orom, 1024*2) ? "2M " : "",
2384 imsm_orom_has_chunk(orom, 1024*4) ? "4M " : "",
2385 imsm_orom_has_chunk(orom, 1024*8) ? "8M " : "",
2386 imsm_orom_has_chunk(orom, 1024*16) ? "16M " : "",
2387 imsm_orom_has_chunk(orom, 1024*32) ? "32M " : "",
2388 imsm_orom_has_chunk(orom, 1024*64) ? "64M " : "");
2389 printf("IMSM_2TB_VOLUMES=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB) ? "yes" : "no");
2390 printf("IMSM_2TB_DISKS=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB_DISK) ? "yes" : "no");
2391 printf("IMSM_MAX_DISKS=%d\n",orom->tds);
2392 printf("IMSM_MAX_VOLUMES_PER_ARRAY=%d\n",orom->vpa);
2393 printf("IMSM_MAX_VOLUMES_PER_CONTROLLER=%d\n",orom->vphba);
2394 }
2395
2396 static int detail_platform_imsm(int verbose, int enumerate_only, char *controller_path)
2397 {
2398 /* There are two components to imsm platform support, the ahci SATA
2399 * controller and the option-rom. To find the SATA controller we
2400 * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
2401 * controller with the Intel vendor id is present. This approach
2402 * allows mdadm to leverage the kernel's ahci detection logic, with the
2403 * caveat that if ahci.ko is not loaded mdadm will not be able to
2404 * detect platform raid capabilities. The option-rom resides in a
2405 * platform "Adapter ROM". We scan for its signature to retrieve the
2406 * platform capabilities. If raid support is disabled in the BIOS the
2407 * option-rom capability structure will not be available.
2408 */
2409 struct sys_dev *list, *hba;
2410 int host_base = 0;
2411 int port_count = 0;
2412 int result=1;
2413
2414 if (enumerate_only) {
2415 if (check_env("IMSM_NO_PLATFORM"))
2416 return 0;
2417 list = find_intel_devices();
2418 if (!list)
2419 return 2;
2420 for (hba = list; hba; hba = hba->next) {
2421 if (find_imsm_capability(hba)) {
2422 result = 0;
2423 break;
2424 }
2425 else
2426 result = 2;
2427 }
2428 return result;
2429 }
2430
2431 list = find_intel_devices();
2432 if (!list) {
2433 if (verbose > 0)
2434 pr_err("no active Intel(R) RAID controller found.\n");
2435 return 2;
2436 } else if (verbose > 0)
2437 print_found_intel_controllers(list);
2438
2439 for (hba = list; hba; hba = hba->next) {
2440 if (controller_path && (compare_paths(hba->path, controller_path) != 0))
2441 continue;
2442 if (!find_imsm_capability(hba)) {
2443 char buf[PATH_MAX];
2444 pr_err("imsm capabilities not found for controller: %s (type %s)\n",
2445 hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path,
2446 get_sys_dev_type(hba->type));
2447 continue;
2448 }
2449 result = 0;
2450 }
2451
2452 if (controller_path && result == 1) {
2453 pr_err("no active Intel(R) RAID controller found under %s\n",
2454 controller_path);
2455 return result;
2456 }
2457
2458 const struct orom_entry *entry;
2459
2460 for (entry = orom_entries; entry; entry = entry->next) {
2461 if (entry->type == SYS_DEV_VMD) {
2462 print_imsm_capability(&entry->orom);
2463 printf(" 3rd party NVMe :%s supported\n",
2464 imsm_orom_has_tpv_support(&entry->orom)?"":" not");
2465 for (hba = list; hba; hba = hba->next) {
2466 if (hba->type == SYS_DEV_VMD) {
2467 char buf[PATH_MAX];
2468 printf(" I/O Controller : %s (%s)\n",
2469 vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
2470 if (print_vmd_attached_devs(hba)) {
2471 if (verbose > 0)
2472 pr_err("failed to get devices attached to VMD domain.\n");
2473 result |= 2;
2474 }
2475 }
2476 }
2477 printf("\n");
2478 continue;
2479 }
2480
2481 print_imsm_capability(&entry->orom);
2482 if (entry->type == SYS_DEV_NVME) {
2483 for (hba = list; hba; hba = hba->next) {
2484 if (hba->type == SYS_DEV_NVME)
2485 printf(" NVMe Device : %s\n", hba->path);
2486 }
2487 printf("\n");
2488 continue;
2489 }
2490
2491 struct devid_list *devid;
2492 for (devid = entry->devid_list; devid; devid = devid->next) {
2493 hba = device_by_id(devid->devid);
2494 if (!hba)
2495 continue;
2496
2497 printf(" I/O Controller : %s (%s)\n",
2498 hba->path, get_sys_dev_type(hba->type));
2499 if (hba->type == SYS_DEV_SATA) {
2500 host_base = ahci_get_port_count(hba->path, &port_count);
2501 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
2502 if (verbose > 0)
2503 pr_err("failed to enumerate ports on SATA controller at %s.\n", hba->pci_id);
2504 result |= 2;
2505 }
2506 }
2507 }
2508 printf("\n");
2509 }
2510
2511 return result;
2512 }
2513
2514 static int export_detail_platform_imsm(int verbose, char *controller_path)
2515 {
2516 struct sys_dev *list, *hba;
2517 int result=1;
2518
2519 list = find_intel_devices();
2520 if (!list) {
2521 if (verbose > 0)
2522 pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_INTEL_DEVICES\n");
2523 result = 2;
2524 return result;
2525 }
2526
2527 for (hba = list; hba; hba = hba->next) {
2528 if (controller_path && (compare_paths(hba->path,controller_path) != 0))
2529 continue;
2530 if (!find_imsm_capability(hba) && verbose > 0) {
2531 char buf[PATH_MAX];
2532 pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",
2533 hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path);
2534 }
2535 else
2536 result = 0;
2537 }
2538
2539 const struct orom_entry *entry;
2540
2541 for (entry = orom_entries; entry; entry = entry->next) {
2542 if (entry->type == SYS_DEV_VMD) {
2543 for (hba = list; hba; hba = hba->next)
2544 print_imsm_capability_export(&entry->orom);
2545 continue;
2546 }
2547 print_imsm_capability_export(&entry->orom);
2548 }
2549
2550 return result;
2551 }
2552
2553 #endif
2554
2555 static int match_home_imsm(struct supertype *st, char *homehost)
2556 {
2557 /* the imsm metadata format does not specify any host
2558 * identification information. We return -1 since we can never
2559 * confirm nor deny whether a given array is "meant" for this
2560 * host. We rely on compare_super and the 'family_num' fields to
2561 * exclude member disks that do not belong, and we rely on
2562 * mdadm.conf to specify the arrays that should be assembled.
2563 * Auto-assembly may still pick up "foreign" arrays.
2564 */
2565
2566 return -1;
2567 }
2568
2569 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
2570 {
2571 /* The uuid returned here is used for:
2572 * uuid to put into bitmap file (Create, Grow)
2573 * uuid for backup header when saving critical section (Grow)
2574 * comparing uuids when re-adding a device into an array
2575 * In these cases the uuid required is that of the data-array,
2576 * not the device-set.
2577 * uuid to recognise same set when adding a missing device back
2578 * to an array. This is a uuid for the device-set.
2579 *
2580 * For each of these we can make do with a truncated
2581 * or hashed uuid rather than the original, as long as
2582 * everyone agrees.
2583 * In each case the uuid required is that of the data-array,
2584 * not the device-set.
2585 */
2586 /* imsm does not track uuid's so we synthesis one using sha1 on
2587 * - The signature (Which is constant for all imsm array, but no matter)
2588 * - the orig_family_num of the container
2589 * - the index number of the volume
2590 * - the 'serial' number of the volume.
2591 * Hopefully these are all constant.
2592 */
2593 struct intel_super *super = st->sb;
2594
2595 char buf[20];
2596 struct sha1_ctx ctx;
2597 struct imsm_dev *dev = NULL;
2598 __u32 family_num;
2599
2600 /* some mdadm versions failed to set ->orig_family_num, in which
2601 * case fall back to ->family_num. orig_family_num will be
2602 * fixed up with the first metadata update.
2603 */
2604 family_num = super->anchor->orig_family_num;
2605 if (family_num == 0)
2606 family_num = super->anchor->family_num;
2607 sha1_init_ctx(&ctx);
2608 sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
2609 sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
2610 if (super->current_vol >= 0)
2611 dev = get_imsm_dev(super, super->current_vol);
2612 if (dev) {
2613 __u32 vol = super->current_vol;
2614 sha1_process_bytes(&vol, sizeof(vol), &ctx);
2615 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
2616 }
2617 sha1_finish_ctx(&ctx, buf);
2618 memcpy(uuid, buf, 4*4);
2619 }
2620
2621 #if 0
2622 static void
2623 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
2624 {
2625 __u8 *v = get_imsm_version(mpb);
2626 __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
2627 char major[] = { 0, 0, 0 };
2628 char minor[] = { 0 ,0, 0 };
2629 char patch[] = { 0, 0, 0 };
2630 char *ver_parse[] = { major, minor, patch };
2631 int i, j;
2632
2633 i = j = 0;
2634 while (*v != '\0' && v < end) {
2635 if (*v != '.' && j < 2)
2636 ver_parse[i][j++] = *v;
2637 else {
2638 i++;
2639 j = 0;
2640 }
2641 v++;
2642 }
2643
2644 *m = strtol(minor, NULL, 0);
2645 *p = strtol(patch, NULL, 0);
2646 }
2647 #endif
2648
2649 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
2650 {
2651 /* migr_strip_size when repairing or initializing parity */
2652 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2653 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2654
2655 switch (get_imsm_raid_level(map)) {
2656 case 5:
2657 case 10:
2658 return chunk;
2659 default:
2660 return 128*1024 >> 9;
2661 }
2662 }
2663
2664 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
2665 {
2666 /* migr_strip_size when rebuilding a degraded disk, no idea why
2667 * this is different than migr_strip_size_resync(), but it's good
2668 * to be compatible
2669 */
2670 struct imsm_map *map = get_imsm_map(dev, MAP_1);
2671 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2672
2673 switch (get_imsm_raid_level(map)) {
2674 case 1:
2675 case 10:
2676 if (map->num_members % map->num_domains == 0)
2677 return 128*1024 >> 9;
2678 else
2679 return chunk;
2680 case 5:
2681 return max((__u32) 64*1024 >> 9, chunk);
2682 default:
2683 return 128*1024 >> 9;
2684 }
2685 }
2686
2687 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
2688 {
2689 struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2690 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2691 __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
2692 __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
2693
2694 return max((__u32) 1, hi_chunk / lo_chunk);
2695 }
2696
2697 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
2698 {
2699 struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2700 int level = get_imsm_raid_level(lo);
2701
2702 if (level == 1 || level == 10) {
2703 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2704
2705 return hi->num_domains;
2706 } else
2707 return num_stripes_per_unit_resync(dev);
2708 }
2709
2710 static __u8 imsm_num_data_members(struct imsm_dev *dev, int second_map)
2711 {
2712 /* named 'imsm_' because raid0, raid1 and raid10
2713 * counter-intuitively have the same number of data disks
2714 */
2715 struct imsm_map *map = get_imsm_map(dev, second_map);
2716
2717 switch (get_imsm_raid_level(map)) {
2718 case 0:
2719 return map->num_members;
2720 break;
2721 case 1:
2722 case 10:
2723 return map->num_members/2;
2724 case 5:
2725 return map->num_members - 1;
2726 default:
2727 dprintf("unsupported raid level\n");
2728 return 0;
2729 }
2730 }
2731
2732 static __u32 parity_segment_depth(struct imsm_dev *dev)
2733 {
2734 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2735 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2736
2737 switch(get_imsm_raid_level(map)) {
2738 case 1:
2739 case 10:
2740 return chunk * map->num_domains;
2741 case 5:
2742 return chunk * map->num_members;
2743 default:
2744 return chunk;
2745 }
2746 }
2747
2748 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
2749 {
2750 struct imsm_map *map = get_imsm_map(dev, MAP_1);
2751 __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2752 __u32 strip = block / chunk;
2753
2754 switch (get_imsm_raid_level(map)) {
2755 case 1:
2756 case 10: {
2757 __u32 vol_strip = (strip * map->num_domains) + 1;
2758 __u32 vol_stripe = vol_strip / map->num_members;
2759
2760 return vol_stripe * chunk + block % chunk;
2761 } case 5: {
2762 __u32 stripe = strip / (map->num_members - 1);
2763
2764 return stripe * chunk + block % chunk;
2765 }
2766 default:
2767 return 0;
2768 }
2769 }
2770
2771 static __u64 blocks_per_migr_unit(struct intel_super *super,
2772 struct imsm_dev *dev)
2773 {
2774 /* calculate the conversion factor between per member 'blocks'
2775 * (md/{resync,rebuild}_start) and imsm migration units, return
2776 * 0 for the 'not migrating' and 'unsupported migration' cases
2777 */
2778 if (!dev->vol.migr_state)
2779 return 0;
2780
2781 switch (migr_type(dev)) {
2782 case MIGR_GEN_MIGR: {
2783 struct migr_record *migr_rec = super->migr_rec;
2784 return __le32_to_cpu(migr_rec->blocks_per_unit);
2785 }
2786 case MIGR_VERIFY:
2787 case MIGR_REPAIR:
2788 case MIGR_INIT: {
2789 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2790 __u32 stripes_per_unit;
2791 __u32 blocks_per_unit;
2792 __u32 parity_depth;
2793 __u32 migr_chunk;
2794 __u32 block_map;
2795 __u32 block_rel;
2796 __u32 segment;
2797 __u32 stripe;
2798 __u8 disks;
2799
2800 /* yes, this is really the translation of migr_units to
2801 * per-member blocks in the 'resync' case
2802 */
2803 stripes_per_unit = num_stripes_per_unit_resync(dev);
2804 migr_chunk = migr_strip_blocks_resync(dev);
2805 disks = imsm_num_data_members(dev, MAP_0);
2806 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
2807 stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
2808 segment = blocks_per_unit / stripe;
2809 block_rel = blocks_per_unit - segment * stripe;
2810 parity_depth = parity_segment_depth(dev);
2811 block_map = map_migr_block(dev, block_rel);
2812 return block_map + parity_depth * segment;
2813 }
2814 case MIGR_REBUILD: {
2815 __u32 stripes_per_unit;
2816 __u32 migr_chunk;
2817
2818 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
2819 migr_chunk = migr_strip_blocks_rebuild(dev);
2820 return migr_chunk * stripes_per_unit;
2821 }
2822 case MIGR_STATE_CHANGE:
2823 default:
2824 return 0;
2825 }
2826 }
2827
2828 static int imsm_level_to_layout(int level)
2829 {
2830 switch (level) {
2831 case 0:
2832 case 1:
2833 return 0;
2834 case 5:
2835 case 6:
2836 return ALGORITHM_LEFT_ASYMMETRIC;
2837 case 10:
2838 return 0x102;
2839 }
2840 return UnSet;
2841 }
2842
2843 /*******************************************************************************
2844 * Function: read_imsm_migr_rec
2845 * Description: Function reads imsm migration record from last sector of disk
2846 * Parameters:
2847 * fd : disk descriptor
2848 * super : metadata info
2849 * Returns:
2850 * 0 : success,
2851 * -1 : fail
2852 ******************************************************************************/
2853 static int read_imsm_migr_rec(int fd, struct intel_super *super)
2854 {
2855 int ret_val = -1;
2856 unsigned int sector_size = super->sector_size;
2857 unsigned long long dsize;
2858
2859 get_dev_size(fd, NULL, &dsize);
2860 if (lseek64(fd, dsize - (sector_size*MIGR_REC_SECTOR_POSITION),
2861 SEEK_SET) < 0) {
2862 pr_err("Cannot seek to anchor block: %s\n",
2863 strerror(errno));
2864 goto out;
2865 }
2866 if ((unsigned int)read(fd, super->migr_rec_buf,
2867 MIGR_REC_BUF_SECTORS*sector_size) !=
2868 MIGR_REC_BUF_SECTORS*sector_size) {
2869 pr_err("Cannot read migr record block: %s\n",
2870 strerror(errno));
2871 goto out;
2872 }
2873 ret_val = 0;
2874 if (sector_size == 4096)
2875 convert_from_4k_imsm_migr_rec(super);
2876
2877 out:
2878 return ret_val;
2879 }
2880
2881 static struct imsm_dev *imsm_get_device_during_migration(
2882 struct intel_super *super)
2883 {
2884
2885 struct intel_dev *dv;
2886
2887 for (dv = super->devlist; dv; dv = dv->next) {
2888 if (is_gen_migration(dv->dev))
2889 return dv->dev;
2890 }
2891 return NULL;
2892 }
2893
2894 /*******************************************************************************
2895 * Function: load_imsm_migr_rec
2896 * Description: Function reads imsm migration record (it is stored at the last
2897 * sector of disk)
2898 * Parameters:
2899 * super : imsm internal array info
2900 * info : general array info
2901 * Returns:
2902 * 0 : success
2903 * -1 : fail
2904 * -2 : no migration in progress
2905 ******************************************************************************/
2906 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
2907 {
2908 struct mdinfo *sd;
2909 struct dl *dl;
2910 char nm[30];
2911 int retval = -1;
2912 int fd = -1;
2913 struct imsm_dev *dev;
2914 struct imsm_map *map;
2915 int slot = -1;
2916
2917 /* find map under migration */
2918 dev = imsm_get_device_during_migration(super);
2919 /* nothing to load,no migration in progress?
2920 */
2921 if (dev == NULL)
2922 return -2;
2923
2924 if (info) {
2925 for (sd = info->devs ; sd ; sd = sd->next) {
2926 /* read only from one of the first two slots */
2927 if ((sd->disk.raid_disk < 0) ||
2928 (sd->disk.raid_disk > 1))
2929 continue;
2930
2931 sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
2932 fd = dev_open(nm, O_RDONLY);
2933 if (fd >= 0)
2934 break;
2935 }
2936 }
2937 if (fd < 0) {
2938 map = get_imsm_map(dev, MAP_0);
2939 for (dl = super->disks; dl; dl = dl->next) {
2940 /* skip spare and failed disks
2941 */
2942 if (dl->index < 0)
2943 continue;
2944 /* read only from one of the first two slots */
2945 if (map)
2946 slot = get_imsm_disk_slot(map, dl->index);
2947 if (map == NULL || slot > 1 || slot < 0)
2948 continue;
2949 sprintf(nm, "%d:%d", dl->major, dl->minor);
2950 fd = dev_open(nm, O_RDONLY);
2951 if (fd >= 0)
2952 break;
2953 }
2954 }
2955 if (fd < 0)
2956 goto out;
2957 retval = read_imsm_migr_rec(fd, super);
2958
2959 out:
2960 if (fd >= 0)
2961 close(fd);
2962 return retval;
2963 }
2964
2965 #ifndef MDASSEMBLE
2966 /*******************************************************************************
2967 * function: imsm_create_metadata_checkpoint_update
2968 * Description: It creates update for checkpoint change.
2969 * Parameters:
2970 * super : imsm internal array info
2971 * u : pointer to prepared update
2972 * Returns:
2973 * Uptate length.
2974 * If length is equal to 0, input pointer u contains no update
2975 ******************************************************************************/
2976 static int imsm_create_metadata_checkpoint_update(
2977 struct intel_super *super,
2978 struct imsm_update_general_migration_checkpoint **u)
2979 {
2980
2981 int update_memory_size = 0;
2982
2983 dprintf("(enter)\n");
2984
2985 if (u == NULL)
2986 return 0;
2987 *u = NULL;
2988
2989 /* size of all update data without anchor */
2990 update_memory_size =
2991 sizeof(struct imsm_update_general_migration_checkpoint);
2992
2993 *u = xcalloc(1, update_memory_size);
2994 if (*u == NULL) {
2995 dprintf("error: cannot get memory\n");
2996 return 0;
2997 }
2998 (*u)->type = update_general_migration_checkpoint;
2999 (*u)->curr_migr_unit = __le32_to_cpu(super->migr_rec->curr_migr_unit);
3000 dprintf("prepared for %u\n", (*u)->curr_migr_unit);
3001
3002 return update_memory_size;
3003 }
3004
3005 static void imsm_update_metadata_locally(struct supertype *st,
3006 void *buf, int len);
3007
3008 /*******************************************************************************
3009 * Function: write_imsm_migr_rec
3010 * Description: Function writes imsm migration record
3011 * (at the last sector of disk)
3012 * Parameters:
3013 * super : imsm internal array info
3014 * Returns:
3015 * 0 : success
3016 * -1 : if fail
3017 ******************************************************************************/
3018 static int write_imsm_migr_rec(struct supertype *st)
3019 {
3020 struct intel_super *super = st->sb;
3021 unsigned int sector_size = super->sector_size;
3022 unsigned long long dsize;
3023 char nm[30];
3024 int fd = -1;
3025 int retval = -1;
3026 struct dl *sd;
3027 int len;
3028 struct imsm_update_general_migration_checkpoint *u;
3029 struct imsm_dev *dev;
3030 struct imsm_map *map;
3031
3032 /* find map under migration */
3033 dev = imsm_get_device_during_migration(super);
3034 /* if no migration, write buffer anyway to clear migr_record
3035 * on disk based on first available device
3036 */
3037 if (dev == NULL)
3038 dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
3039 super->current_vol);
3040
3041 map = get_imsm_map(dev, MAP_0);
3042
3043 if (sector_size == 4096)
3044 convert_to_4k_imsm_migr_rec(super);
3045 for (sd = super->disks ; sd ; sd = sd->next) {
3046 int slot = -1;
3047
3048 /* skip failed and spare devices */
3049 if (sd->index < 0)
3050 continue;
3051 /* write to 2 first slots only */
3052 if (map)
3053 slot = get_imsm_disk_slot(map, sd->index);
3054 if (map == NULL || slot > 1 || slot < 0)
3055 continue;
3056
3057 sprintf(nm, "%d:%d", sd->major, sd->minor);
3058 fd = dev_open(nm, O_RDWR);
3059 if (fd < 0)
3060 continue;
3061 get_dev_size(fd, NULL, &dsize);
3062 if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size),
3063 SEEK_SET) < 0) {
3064 pr_err("Cannot seek to anchor block: %s\n",
3065 strerror(errno));
3066 goto out;
3067 }
3068 if ((unsigned int)write(fd, super->migr_rec_buf,
3069 MIGR_REC_BUF_SECTORS*sector_size) !=
3070 MIGR_REC_BUF_SECTORS*sector_size) {
3071 pr_err("Cannot write migr record block: %s\n",
3072 strerror(errno));
3073 goto out;
3074 }
3075 close(fd);
3076 fd = -1;
3077 }
3078 if (sector_size == 4096)
3079 convert_from_4k_imsm_migr_rec(super);
3080 /* update checkpoint information in metadata */
3081 len = imsm_create_metadata_checkpoint_update(super, &u);
3082 if (len <= 0) {
3083 dprintf("imsm: Cannot prepare update\n");
3084 goto out;
3085 }
3086 /* update metadata locally */
3087 imsm_update_metadata_locally(st, u, len);
3088 /* and possibly remotely */
3089 if (st->update_tail) {
3090 append_metadata_update(st, u, len);
3091 /* during reshape we do all work inside metadata handler
3092 * manage_reshape(), so metadata update has to be triggered
3093 * insida it
3094 */
3095 flush_metadata_updates(st);
3096 st->update_tail = &st->updates;
3097 } else
3098 free(u);
3099
3100 retval = 0;
3101 out:
3102 if (fd >= 0)
3103 close(fd);
3104 return retval;
3105 }
3106 #endif /* MDASSEMBLE */
3107
3108 /* spare/missing disks activations are not allowe when
3109 * array/container performs reshape operation, because
3110 * all arrays in container works on the same disks set
3111 */
3112 int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
3113 {
3114 int rv = 0;
3115 struct intel_dev *i_dev;
3116 struct imsm_dev *dev;
3117
3118 /* check whole container
3119 */
3120 for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
3121 dev = i_dev->dev;