47809bc21642613fe0820e158bafdff5dbb215cc
[thirdparty/mdadm.git] / super-intel.c
1 /*
2  * mdadm - Intel(R) Matrix Storage Manager Support
3  *
4  * Copyright (C) 2002-2008 Intel Corporation
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along with
16  * this program; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19
20 #define HAVE_STDINT_H 1
21 #include "mdadm.h"
22 #include "mdmon.h"
23 #include "sha1.h"
24 #include "platform-intel.h"
25 #include <values.h>
26 #include <scsi/sg.h>
27 #include <ctype.h>
28 #include <dirent.h>
29
30 /* MPB == Metadata Parameter Block */
31 #define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. "
32 #define MPB_SIG_LEN (strlen(MPB_SIGNATURE))
33 #define MPB_VERSION_RAID0 "1.0.00"
34 #define MPB_VERSION_RAID1 "1.1.00"
35 #define MPB_VERSION_MANY_VOLUMES_PER_ARRAY "1.2.00"
36 #define MPB_VERSION_3OR4_DISK_ARRAY "1.2.01"
37 #define MPB_VERSION_RAID5 "1.2.02"
38 #define MPB_VERSION_5OR6_DISK_ARRAY "1.2.04"
39 #define MPB_VERSION_CNG "1.2.06"
40 #define MPB_VERSION_ATTRIBS "1.3.00"
41 #define MAX_SIGNATURE_LENGTH  32
42 #define MAX_RAID_SERIAL_LEN   16
43
44 /* supports RAID0 */
45 #define MPB_ATTRIB_RAID0                __cpu_to_le32(0x00000001)
46 /* supports RAID1 */
47 #define MPB_ATTRIB_RAID1                __cpu_to_le32(0x00000002)
48 /* supports RAID10 */
49 #define MPB_ATTRIB_RAID10               __cpu_to_le32(0x00000004)
50 /* supports RAID1E */
51 #define MPB_ATTRIB_RAID1E               __cpu_to_le32(0x00000008)
52 /* supports RAID5 */
53 #define MPB_ATTRIB_RAID5                __cpu_to_le32(0x00000010)
54 /* supports RAID CNG */
55 #define MPB_ATTRIB_RAIDCNG              __cpu_to_le32(0x00000020)
56 /* supports expanded stripe sizes of  256K, 512K and 1MB */
57 #define MPB_ATTRIB_EXP_STRIPE_SIZE      __cpu_to_le32(0x00000040)
58
59 /* The OROM Support RST Caching of Volumes */
60 #define MPB_ATTRIB_NVM                  __cpu_to_le32(0x02000000)
61 /* The OROM supports creating disks greater than 2TB */
62 #define MPB_ATTRIB_2TB_DISK             __cpu_to_le32(0x04000000)
63 /* The OROM supports Bad Block Management */
64 #define MPB_ATTRIB_BBM                  __cpu_to_le32(0x08000000)
65
66 /* THe OROM Supports NVM Caching of Volumes */
67 #define MPB_ATTRIB_NEVER_USE2           __cpu_to_le32(0x10000000)
68 /* The OROM supports creating volumes greater than 2TB */
69 #define MPB_ATTRIB_2TB                  __cpu_to_le32(0x20000000)
70 /* originally for PMP, now it's wasted b/c. Never use this bit! */
71 #define MPB_ATTRIB_NEVER_USE            __cpu_to_le32(0x40000000)
72 /* Verify MPB contents against checksum after reading MPB */
73 #define MPB_ATTRIB_CHECKSUM_VERIFY      __cpu_to_le32(0x80000000)
74
75 /* Define all supported attributes that have to be accepted by mdadm
76  */
77 #define MPB_ATTRIB_SUPPORTED           (MPB_ATTRIB_CHECKSUM_VERIFY | \
78                                         MPB_ATTRIB_2TB             | \
79                                         MPB_ATTRIB_2TB_DISK        | \
80                                         MPB_ATTRIB_RAID0           | \
81                                         MPB_ATTRIB_RAID1           | \
82                                         MPB_ATTRIB_RAID10          | \
83                                         MPB_ATTRIB_RAID5           | \
84                                         MPB_ATTRIB_EXP_STRIPE_SIZE | \
85                                         MPB_ATTRIB_BBM)
86
87 /* Define attributes that are unused but not harmful */
88 #define MPB_ATTRIB_IGNORED              (MPB_ATTRIB_NEVER_USE)
89
90 #define MPB_SECTOR_CNT 2210
91 #define IMSM_RESERVED_SECTORS 8192
92 #define NUM_BLOCKS_DIRTY_STRIPE_REGION 2048
93 #define SECT_PER_MB_SHIFT 11
94 #define MAX_SECTOR_SIZE 4096
95 #define MULTIPLE_PPL_AREA_SIZE_IMSM (1024 * 1024) /* Size of the whole
96                                                    * mutliple PPL area
97                                                    */
98
99 /*
100  * This macro let's us ensure that no-one accidentally
101  * changes the size of a struct
102  */
103 #define ASSERT_SIZE(_struct, size) \
104 static inline void __assert_size_##_struct(void)        \
105 {                                                       \
106         switch (0) {                                    \
107         case 0: break;                                  \
108         case (sizeof(struct _struct) == size): break;   \
109         }                                               \
110 }
111
112 /* Disk configuration info. */
113 #define IMSM_MAX_DEVICES 255
114 struct imsm_disk {
115         __u8 serial[MAX_RAID_SERIAL_LEN];/* 0xD8 - 0xE7 ascii serial number */
116         __u32 total_blocks_lo;           /* 0xE8 - 0xEB total blocks lo */
117         __u32 scsi_id;                   /* 0xEC - 0xEF scsi ID */
118 #define SPARE_DISK      __cpu_to_le32(0x01)  /* Spare */
119 #define CONFIGURED_DISK __cpu_to_le32(0x02)  /* Member of some RaidDev */
120 #define FAILED_DISK     __cpu_to_le32(0x04)  /* Permanent failure */
121 #define JOURNAL_DISK    __cpu_to_le32(0x2000000) /* Device marked as Journaling Drive */
122         __u32 status;                    /* 0xF0 - 0xF3 */
123         __u32 owner_cfg_num; /* which config 0,1,2... owns this disk */
124         __u32 total_blocks_hi;           /* 0xF4 - 0xF5 total blocks hi */
125 #define IMSM_DISK_FILLERS       3
126         __u32 filler[IMSM_DISK_FILLERS]; /* 0xF5 - 0x107 MPB_DISK_FILLERS for future expansion */
127 };
128 ASSERT_SIZE(imsm_disk, 48)
129
130 /* map selector for map managment
131  */
132 #define MAP_0           0
133 #define MAP_1           1
134 #define MAP_X           -1
135
136 /* RAID map configuration infos. */
137 struct imsm_map {
138         __u32 pba_of_lba0_lo;   /* start address of partition */
139         __u32 blocks_per_member_lo;/* blocks per member */
140         __u32 num_data_stripes_lo;      /* number of data stripes */
141         __u16 blocks_per_strip;
142         __u8  map_state;        /* Normal, Uninitialized, Degraded, Failed */
143 #define IMSM_T_STATE_NORMAL 0
144 #define IMSM_T_STATE_UNINITIALIZED 1
145 #define IMSM_T_STATE_DEGRADED 2
146 #define IMSM_T_STATE_FAILED 3
147         __u8  raid_level;
148 #define IMSM_T_RAID0 0
149 #define IMSM_T_RAID1 1
150 #define IMSM_T_RAID5 5          /* since metadata version 1.2.02 ? */
151         __u8  num_members;      /* number of member disks */
152         __u8  num_domains;      /* number of parity domains */
153         __u8  failed_disk_num;  /* valid only when state is degraded */
154         __u8  ddf;
155         __u32 pba_of_lba0_hi;
156         __u32 blocks_per_member_hi;
157         __u32 num_data_stripes_hi;
158         __u32 filler[4];        /* expansion area */
159 #define IMSM_ORD_REBUILD (1 << 24)
160         __u32 disk_ord_tbl[1];  /* disk_ord_tbl[num_members],
161                                  * top byte contains some flags
162                                  */
163 };
164 ASSERT_SIZE(imsm_map, 52)
165
166 struct imsm_vol {
167         __u32 curr_migr_unit;
168         __u32 checkpoint_id;    /* id to access curr_migr_unit */
169         __u8  migr_state;       /* Normal or Migrating */
170 #define MIGR_INIT 0
171 #define MIGR_REBUILD 1
172 #define MIGR_VERIFY 2 /* analagous to echo check > sync_action */
173 #define MIGR_GEN_MIGR 3
174 #define MIGR_STATE_CHANGE 4
175 #define MIGR_REPAIR 5
176         __u8  migr_type;        /* Initializing, Rebuilding, ... */
177 #define RAIDVOL_CLEAN          0
178 #define RAIDVOL_DIRTY          1
179 #define RAIDVOL_DSRECORD_VALID 2
180         __u8  dirty;
181         __u8  fs_state;         /* fast-sync state for CnG (0xff == disabled) */
182         __u16 verify_errors;    /* number of mismatches */
183         __u16 bad_blocks;       /* number of bad blocks during verify */
184         __u32 filler[4];
185         struct imsm_map map[1];
186         /* here comes another one if migr_state */
187 };
188 ASSERT_SIZE(imsm_vol, 84)
189
190 struct imsm_dev {
191         __u8  volume[MAX_RAID_SERIAL_LEN];
192         __u32 size_low;
193         __u32 size_high;
194 #define DEV_BOOTABLE            __cpu_to_le32(0x01)
195 #define DEV_BOOT_DEVICE         __cpu_to_le32(0x02)
196 #define DEV_READ_COALESCING     __cpu_to_le32(0x04)
197 #define DEV_WRITE_COALESCING    __cpu_to_le32(0x08)
198 #define DEV_LAST_SHUTDOWN_DIRTY __cpu_to_le32(0x10)
199 #define DEV_HIDDEN_AT_BOOT      __cpu_to_le32(0x20)
200 #define DEV_CURRENTLY_HIDDEN    __cpu_to_le32(0x40)
201 #define DEV_VERIFY_AND_FIX      __cpu_to_le32(0x80)
202 #define DEV_MAP_STATE_UNINIT    __cpu_to_le32(0x100)
203 #define DEV_NO_AUTO_RECOVERY    __cpu_to_le32(0x200)
204 #define DEV_CLONE_N_GO          __cpu_to_le32(0x400)
205 #define DEV_CLONE_MAN_SYNC      __cpu_to_le32(0x800)
206 #define DEV_CNG_MASTER_DISK_NUM __cpu_to_le32(0x1000)
207         __u32 status;   /* Persistent RaidDev status */
208         __u32 reserved_blocks; /* Reserved blocks at beginning of volume */
209         __u8  migr_priority;
210         __u8  num_sub_vols;
211         __u8  tid;
212         __u8  cng_master_disk;
213         __u16 cache_policy;
214         __u8  cng_state;
215         __u8  cng_sub_state;
216         __u16 my_vol_raid_dev_num; /* Used in Unique volume Id for this RaidDev */
217
218         /* NVM_EN */
219         __u8 nv_cache_mode;
220         __u8 nv_cache_flags;
221
222         /* Unique Volume Id of the NvCache Volume associated with this volume */
223         __u32 nvc_vol_orig_family_num;
224         __u16 nvc_vol_raid_dev_num;
225
226 #define RWH_OFF 0
227 #define RWH_DISTRIBUTED 1
228 #define RWH_JOURNALING_DRIVE 2
229 #define RWH_MULTIPLE_DISTRIBUTED 3
230 #define RWH_MULTIPLE_PPLS_JOURNALING_DRIVE 4
231 #define RWH_MULTIPLE_OFF 5
232         __u8  rwh_policy; /* Raid Write Hole Policy */
233         __u8  jd_serial[MAX_RAID_SERIAL_LEN]; /* Journal Drive serial number */
234         __u8  filler1;
235
236 #define IMSM_DEV_FILLERS 3
237         __u32 filler[IMSM_DEV_FILLERS];
238         struct imsm_vol vol;
239 };
240 ASSERT_SIZE(imsm_dev, 164)
241
242 struct imsm_super {
243         __u8 sig[MAX_SIGNATURE_LENGTH]; /* 0x00 - 0x1F */
244         __u32 check_sum;                /* 0x20 - 0x23 MPB Checksum */
245         __u32 mpb_size;                 /* 0x24 - 0x27 Size of MPB */
246         __u32 family_num;               /* 0x28 - 0x2B Checksum from first time this config was written */
247         __u32 generation_num;           /* 0x2C - 0x2F Incremented each time this array's MPB is written */
248         __u32 error_log_size;           /* 0x30 - 0x33 in bytes */
249         __u32 attributes;               /* 0x34 - 0x37 */
250         __u8 num_disks;                 /* 0x38 Number of configured disks */
251         __u8 num_raid_devs;             /* 0x39 Number of configured volumes */
252         __u8 error_log_pos;             /* 0x3A  */
253         __u8 fill[1];                   /* 0x3B */
254         __u32 cache_size;               /* 0x3c - 0x40 in mb */
255         __u32 orig_family_num;          /* 0x40 - 0x43 original family num */
256         __u32 pwr_cycle_count;          /* 0x44 - 0x47 simulated power cycle count for array */
257         __u32 bbm_log_size;             /* 0x48 - 0x4B - size of bad Block Mgmt Log in bytes */
258         __u16 num_raid_devs_created;    /* 0x4C - 0x4D Used for generating unique
259                                          * volume IDs for raid_dev created in this array
260                                          * (starts at 1)
261                                          */
262         __u16 filler1;                  /* 0x4E - 0x4F */
263 #define IMSM_FILLERS 34
264         __u32 filler[IMSM_FILLERS];     /* 0x50 - 0xD7 RAID_MPB_FILLERS */
265         struct imsm_disk disk[1];       /* 0xD8 diskTbl[numDisks] */
266         /* here comes imsm_dev[num_raid_devs] */
267         /* here comes BBM logs */
268 };
269 ASSERT_SIZE(imsm_super, 264)
270
271 #define BBM_LOG_MAX_ENTRIES 254
272 #define BBM_LOG_MAX_LBA_ENTRY_VAL 256           /* Represents 256 LBAs */
273 #define BBM_LOG_SIGNATURE 0xabadb10c
274
275 struct bbm_log_block_addr {
276         __u16 w1;
277         __u32 dw1;
278 } __attribute__ ((__packed__));
279
280 struct bbm_log_entry {
281         __u8 marked_count;              /* Number of blocks marked - 1 */
282         __u8 disk_ordinal;              /* Disk entry within the imsm_super */
283         struct bbm_log_block_addr defective_block_start;
284 } __attribute__ ((__packed__));
285
286 struct bbm_log {
287         __u32 signature; /* 0xABADB10C */
288         __u32 entry_count;
289         struct bbm_log_entry marked_block_entries[BBM_LOG_MAX_ENTRIES];
290 };
291 ASSERT_SIZE(bbm_log, 2040)
292
293 static char *map_state_str[] = { "normal", "uninitialized", "degraded", "failed" };
294
295 #define BLOCKS_PER_KB   (1024/512)
296
297 #define RAID_DISK_RESERVED_BLOCKS_IMSM_HI 2209
298
299 #define GEN_MIGR_AREA_SIZE 2048 /* General Migration Copy Area size in blocks */
300
301 #define MIGR_REC_BUF_SECTORS 1 /* size of migr_record i/o buffer in sectors */
302 #define MIGR_REC_SECTOR_POSITION 1 /* migr_record position offset on disk,
303                                * MIGR_REC_BUF_SECTORS <= MIGR_REC_SECTOR_POS
304                                */
305
306 #define UNIT_SRC_NORMAL     0   /* Source data for curr_migr_unit must
307                                  *  be recovered using srcMap */
308 #define UNIT_SRC_IN_CP_AREA 1   /* Source data for curr_migr_unit has
309                                  *  already been migrated and must
310                                  *  be recovered from checkpoint area */
311
312 #define PPL_ENTRY_SPACE (128 * 1024) /* Size of single PPL, without the header */
313
314 struct migr_record {
315         __u32 rec_status;           /* Status used to determine how to restart
316                                      * migration in case it aborts
317                                      * in some fashion */
318         __u32 curr_migr_unit_lo;    /* 0..numMigrUnits-1 */
319         __u32 family_num;           /* Family number of MPB
320                                      * containing the RaidDev
321                                      * that is migrating */
322         __u32 ascending_migr;       /* True if migrating in increasing
323                                      * order of lbas */
324         __u32 blocks_per_unit;      /* Num disk blocks per unit of operation */
325         __u32 dest_depth_per_unit;  /* Num member blocks each destMap
326                                      * member disk
327                                      * advances per unit-of-operation */
328         __u32 ckpt_area_pba_lo;     /* Pba of first block of ckpt copy area */
329         __u32 dest_1st_member_lba_lo;   /* First member lba on first
330                                          * stripe of destination */
331         __u32 num_migr_units_lo;    /* Total num migration units-of-op */
332         __u32 post_migr_vol_cap;    /* Size of volume after
333                                      * migration completes */
334         __u32 post_migr_vol_cap_hi; /* Expansion space for LBA64 */
335         __u32 ckpt_read_disk_num;   /* Which member disk in destSubMap[0] the
336                                      * migration ckpt record was read from
337                                      * (for recovered migrations) */
338         __u32 curr_migr_unit_hi;    /* 0..numMigrUnits-1 high order 32 bits */
339         __u32 ckpt_area_pba_hi;     /* Pba of first block of ckpt copy area
340                                      * high order 32 bits */
341         __u32 dest_1st_member_lba_hi; /* First member lba on first stripe of
342                                        * destination - high order 32 bits */
343         __u32 num_migr_units_hi;      /* Total num migration units-of-op
344                                        * high order 32 bits */
345 };
346 ASSERT_SIZE(migr_record, 64)
347
348 struct md_list {
349         /* usage marker:
350          *  1: load metadata
351          *  2: metadata does not match
352          *  4: already checked
353          */
354         int   used;
355         char  *devname;
356         int   found;
357         int   container;
358         dev_t st_rdev;
359         struct md_list *next;
360 };
361
362 #define pr_vrb(fmt, arg...) (void) (verbose && pr_err(fmt, ##arg))
363
364 static __u8 migr_type(struct imsm_dev *dev)
365 {
366         if (dev->vol.migr_type == MIGR_VERIFY &&
367             dev->status & DEV_VERIFY_AND_FIX)
368                 return MIGR_REPAIR;
369         else
370                 return dev->vol.migr_type;
371 }
372
373 static void set_migr_type(struct imsm_dev *dev, __u8 migr_type)
374 {
375         /* for compatibility with older oroms convert MIGR_REPAIR, into
376          * MIGR_VERIFY w/ DEV_VERIFY_AND_FIX status
377          */
378         if (migr_type == MIGR_REPAIR) {
379                 dev->vol.migr_type = MIGR_VERIFY;
380                 dev->status |= DEV_VERIFY_AND_FIX;
381         } else {
382                 dev->vol.migr_type = migr_type;
383                 dev->status &= ~DEV_VERIFY_AND_FIX;
384         }
385 }
386
387 static unsigned int sector_count(__u32 bytes, unsigned int sector_size)
388 {
389         return ROUND_UP(bytes, sector_size) / sector_size;
390 }
391
392 static unsigned int mpb_sectors(struct imsm_super *mpb,
393                                         unsigned int sector_size)
394 {
395         return sector_count(__le32_to_cpu(mpb->mpb_size), sector_size);
396 }
397
398 struct intel_dev {
399         struct imsm_dev *dev;
400         struct intel_dev *next;
401         unsigned index;
402 };
403
404 struct intel_hba {
405         enum sys_dev_type type;
406         char *path;
407         char *pci_id;
408         struct intel_hba *next;
409 };
410
411 enum action {
412         DISK_REMOVE = 1,
413         DISK_ADD
414 };
415 /* internal representation of IMSM metadata */
416 struct intel_super {
417         union {
418                 void *buf; /* O_DIRECT buffer for reading/writing metadata */
419                 struct imsm_super *anchor; /* immovable parameters */
420         };
421         union {
422                 void *migr_rec_buf; /* buffer for I/O operations */
423                 struct migr_record *migr_rec; /* migration record */
424         };
425         int clean_migration_record_by_mdmon; /* when reshape is switched to next
426                 array, it indicates that mdmon is allowed to clean migration
427                 record */
428         size_t len; /* size of the 'buf' allocation */
429         size_t extra_space; /* extra space in 'buf' that is not used yet */
430         void *next_buf; /* for realloc'ing buf from the manager */
431         size_t next_len;
432         int updates_pending; /* count of pending updates for mdmon */
433         int current_vol; /* index of raid device undergoing creation */
434         unsigned long long create_offset; /* common start for 'current_vol' */
435         __u32 random; /* random data for seeding new family numbers */
436         struct intel_dev *devlist;
437         unsigned int sector_size; /* sector size of used member drives */
438         struct dl {
439                 struct dl *next;
440                 int index;
441                 __u8 serial[MAX_RAID_SERIAL_LEN];
442                 int major, minor;
443                 char *devname;
444                 struct imsm_disk disk;
445                 int fd;
446                 int extent_cnt;
447                 struct extent *e; /* for determining freespace @ create */
448                 int raiddisk; /* slot to fill in autolayout */
449                 enum action action;
450         } *disks, *current_disk;
451         struct dl *disk_mgmt_list; /* list of disks to add/remove while mdmon
452                                       active */
453         struct dl *missing; /* disks removed while we weren't looking */
454         struct bbm_log *bbm_log;
455         struct intel_hba *hba; /* device path of the raid controller for this metadata */
456         const struct imsm_orom *orom; /* platform firmware support */
457         struct intel_super *next; /* (temp) list for disambiguating family_num */
458         struct md_bb bb;        /* memory for get_bad_blocks call */
459 };
460
461 struct intel_disk {
462         struct imsm_disk disk;
463         #define IMSM_UNKNOWN_OWNER (-1)
464         int owner;
465         struct intel_disk *next;
466 };
467
468 struct extent {
469         unsigned long long start, size;
470 };
471
472 /* definitions of reshape process types */
473 enum imsm_reshape_type {
474         CH_TAKEOVER,
475         CH_MIGRATION,
476         CH_ARRAY_SIZE,
477 };
478
479 /* definition of messages passed to imsm_process_update */
480 enum imsm_update_type {
481         update_activate_spare,
482         update_create_array,
483         update_kill_array,
484         update_rename_array,
485         update_add_remove_disk,
486         update_reshape_container_disks,
487         update_reshape_migration,
488         update_takeover,
489         update_general_migration_checkpoint,
490         update_size_change,
491         update_prealloc_badblocks_mem,
492         update_rwh_policy,
493 };
494
495 struct imsm_update_activate_spare {
496         enum imsm_update_type type;
497         struct dl *dl;
498         int slot;
499         int array;
500         struct imsm_update_activate_spare *next;
501 };
502
503 struct geo_params {
504         char devnm[32];
505         char *dev_name;
506         unsigned long long size;
507         int level;
508         int layout;
509         int chunksize;
510         int raid_disks;
511 };
512
513 enum takeover_direction {
514         R10_TO_R0,
515         R0_TO_R10
516 };
517 struct imsm_update_takeover {
518         enum imsm_update_type type;
519         int subarray;
520         enum takeover_direction direction;
521 };
522
523 struct imsm_update_reshape {
524         enum imsm_update_type type;
525         int old_raid_disks;
526         int new_raid_disks;
527
528         int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
529 };
530
531 struct imsm_update_reshape_migration {
532         enum imsm_update_type type;
533         int old_raid_disks;
534         int new_raid_disks;
535         /* fields for array migration changes
536          */
537         int subdev;
538         int new_level;
539         int new_layout;
540         int new_chunksize;
541
542         int new_disks[1]; /* new_raid_disks - old_raid_disks makedev number */
543 };
544
545 struct imsm_update_size_change {
546         enum imsm_update_type type;
547         int subdev;
548         long long new_size;
549 };
550
551 struct imsm_update_general_migration_checkpoint {
552         enum imsm_update_type type;
553         __u32 curr_migr_unit;
554 };
555
556 struct disk_info {
557         __u8 serial[MAX_RAID_SERIAL_LEN];
558 };
559
560 struct imsm_update_create_array {
561         enum imsm_update_type type;
562         int dev_idx;
563         struct imsm_dev dev;
564 };
565
566 struct imsm_update_kill_array {
567         enum imsm_update_type type;
568         int dev_idx;
569 };
570
571 struct imsm_update_rename_array {
572         enum imsm_update_type type;
573         __u8 name[MAX_RAID_SERIAL_LEN];
574         int dev_idx;
575 };
576
577 struct imsm_update_add_remove_disk {
578         enum imsm_update_type type;
579 };
580
581 struct imsm_update_prealloc_bb_mem {
582         enum imsm_update_type type;
583 };
584
585 struct imsm_update_rwh_policy {
586         enum imsm_update_type type;
587         int new_policy;
588         int dev_idx;
589 };
590
591 static const char *_sys_dev_type[] = {
592         [SYS_DEV_UNKNOWN] = "Unknown",
593         [SYS_DEV_SAS] = "SAS",
594         [SYS_DEV_SATA] = "SATA",
595         [SYS_DEV_NVME] = "NVMe",
596         [SYS_DEV_VMD] = "VMD"
597 };
598
599 const char *get_sys_dev_type(enum sys_dev_type type)
600 {
601         if (type >= SYS_DEV_MAX)
602                 type = SYS_DEV_UNKNOWN;
603
604         return _sys_dev_type[type];
605 }
606
607 static struct intel_hba * alloc_intel_hba(struct sys_dev *device)
608 {
609         struct intel_hba *result = xmalloc(sizeof(*result));
610
611         result->type = device->type;
612         result->path = xstrdup(device->path);
613         result->next = NULL;
614         if (result->path && (result->pci_id = strrchr(result->path, '/')) != NULL)
615                 result->pci_id++;
616
617         return result;
618 }
619
620 static struct intel_hba * find_intel_hba(struct intel_hba *hba, struct sys_dev *device)
621 {
622         struct intel_hba *result;
623
624         for (result = hba; result; result = result->next) {
625                 if (result->type == device->type && strcmp(result->path, device->path) == 0)
626                         break;
627         }
628         return result;
629 }
630
631 static int attach_hba_to_super(struct intel_super *super, struct sys_dev *device)
632 {
633         struct intel_hba *hba;
634
635         /* check if disk attached to Intel HBA */
636         hba = find_intel_hba(super->hba, device);
637         if (hba != NULL)
638                 return 1;
639         /* Check if HBA is already attached to super */
640         if (super->hba == NULL) {
641                 super->hba = alloc_intel_hba(device);
642                 return 1;
643         }
644
645         hba = super->hba;
646         /* Intel metadata allows for all disks attached to the same type HBA.
647          * Do not support HBA types mixing
648          */
649         if (device->type != hba->type)
650                 return 2;
651
652         /* Multiple same type HBAs can be used if they share the same OROM */
653         const struct imsm_orom *device_orom = get_orom_by_device_id(device->dev_id);
654
655         if (device_orom != super->orom)
656                 return 2;
657
658         while (hba->next)
659                 hba = hba->next;
660
661         hba->next = alloc_intel_hba(device);
662         return 1;
663 }
664
665 static struct sys_dev* find_disk_attached_hba(int fd, const char *devname)
666 {
667         struct sys_dev *list, *elem;
668         char *disk_path;
669
670         if ((list = find_intel_devices()) == NULL)
671                 return 0;
672
673         if (fd < 0)
674                 disk_path  = (char *) devname;
675         else
676                 disk_path = diskfd_to_devpath(fd);
677
678         if (!disk_path)
679                 return 0;
680
681         for (elem = list; elem; elem = elem->next)
682                 if (path_attached_to_hba(disk_path, elem->path))
683                         return elem;
684
685         if (disk_path != devname)
686                 free(disk_path);
687
688         return NULL;
689 }
690
691 static int find_intel_hba_capability(int fd, struct intel_super *super,
692                                      char *devname);
693
694 static struct supertype *match_metadata_desc_imsm(char *arg)
695 {
696         struct supertype *st;
697
698         if (strcmp(arg, "imsm") != 0 &&
699             strcmp(arg, "default") != 0
700                 )
701                 return NULL;
702
703         st = xcalloc(1, sizeof(*st));
704         st->ss = &super_imsm;
705         st->max_devs = IMSM_MAX_DEVICES;
706         st->minor_version = 0;
707         st->sb = NULL;
708         return st;
709 }
710
711 static __u8 *get_imsm_version(struct imsm_super *mpb)
712 {
713         return &mpb->sig[MPB_SIG_LEN];
714 }
715
716 /* retrieve a disk directly from the anchor when the anchor is known to be
717  * up-to-date, currently only at load time
718  */
719 static struct imsm_disk *__get_imsm_disk(struct imsm_super *mpb, __u8 index)
720 {
721         if (index >= mpb->num_disks)
722                 return NULL;
723         return &mpb->disk[index];
724 }
725
726 /* retrieve the disk description based on a index of the disk
727  * in the sub-array
728  */
729 static struct dl *get_imsm_dl_disk(struct intel_super *super, __u8 index)
730 {
731         struct dl *d;
732
733         for (d = super->disks; d; d = d->next)
734                 if (d->index == index)
735                         return d;
736
737         return NULL;
738 }
739 /* retrieve a disk from the parsed metadata */
740 static struct imsm_disk *get_imsm_disk(struct intel_super *super, __u8 index)
741 {
742         struct dl *dl;
743
744         dl = get_imsm_dl_disk(super, index);
745         if (dl)
746                 return &dl->disk;
747
748         return NULL;
749 }
750
751 /* generate a checksum directly from the anchor when the anchor is known to be
752  * up-to-date, currently only at load or write_super after coalescing
753  */
754 static __u32 __gen_imsm_checksum(struct imsm_super *mpb)
755 {
756         __u32 end = mpb->mpb_size / sizeof(end);
757         __u32 *p = (__u32 *) mpb;
758         __u32 sum = 0;
759
760         while (end--) {
761                 sum += __le32_to_cpu(*p);
762                 p++;
763         }
764
765         return sum - __le32_to_cpu(mpb->check_sum);
766 }
767
768 static size_t sizeof_imsm_map(struct imsm_map *map)
769 {
770         return sizeof(struct imsm_map) + sizeof(__u32) * (map->num_members - 1);
771 }
772
773 struct imsm_map *get_imsm_map(struct imsm_dev *dev, int second_map)
774 {
775         /* A device can have 2 maps if it is in the middle of a migration.
776          * If second_map is:
777          *    MAP_0 - we return the first map
778          *    MAP_1 - we return the second map if it exists, else NULL
779          *    MAP_X - we return the second map if it exists, else the first
780          */
781         struct imsm_map *map = &dev->vol.map[0];
782         struct imsm_map *map2 = NULL;
783
784         if (dev->vol.migr_state)
785                 map2 = (void *)map + sizeof_imsm_map(map);
786
787         switch (second_map) {
788         case MAP_0:
789                 break;
790         case MAP_1:
791                 map = map2;
792                 break;
793         case MAP_X:
794                 if (map2)
795                         map = map2;
796                 break;
797         default:
798                 map = NULL;
799         }
800         return map;
801
802 }
803
804 /* return the size of the device.
805  * migr_state increases the returned size if map[0] were to be duplicated
806  */
807 static size_t sizeof_imsm_dev(struct imsm_dev *dev, int migr_state)
808 {
809         size_t size = sizeof(*dev) - sizeof(struct imsm_map) +
810                       sizeof_imsm_map(get_imsm_map(dev, MAP_0));
811
812         /* migrating means an additional map */
813         if (dev->vol.migr_state)
814                 size += sizeof_imsm_map(get_imsm_map(dev, MAP_1));
815         else if (migr_state)
816                 size += sizeof_imsm_map(get_imsm_map(dev, MAP_0));
817
818         return size;
819 }
820
821 /* retrieve disk serial number list from a metadata update */
822 static struct disk_info *get_disk_info(struct imsm_update_create_array *update)
823 {
824         void *u = update;
825         struct disk_info *inf;
826
827         inf = u + sizeof(*update) - sizeof(struct imsm_dev) +
828               sizeof_imsm_dev(&update->dev, 0);
829
830         return inf;
831 }
832
833 static struct imsm_dev *__get_imsm_dev(struct imsm_super *mpb, __u8 index)
834 {
835         int offset;
836         int i;
837         void *_mpb = mpb;
838
839         if (index >= mpb->num_raid_devs)
840                 return NULL;
841
842         /* devices start after all disks */
843         offset = ((void *) &mpb->disk[mpb->num_disks]) - _mpb;
844
845         for (i = 0; i <= index; i++)
846                 if (i == index)
847                         return _mpb + offset;
848                 else
849                         offset += sizeof_imsm_dev(_mpb + offset, 0);
850
851         return NULL;
852 }
853
854 static struct imsm_dev *get_imsm_dev(struct intel_super *super, __u8 index)
855 {
856         struct intel_dev *dv;
857
858         if (index >= super->anchor->num_raid_devs)
859                 return NULL;
860         for (dv = super->devlist; dv; dv = dv->next)
861                 if (dv->index == index)
862                         return dv->dev;
863         return NULL;
864 }
865
866 static inline unsigned long long __le48_to_cpu(const struct bbm_log_block_addr
867                                                *addr)
868 {
869         return ((((__u64)__le32_to_cpu(addr->dw1)) << 16) |
870                 __le16_to_cpu(addr->w1));
871 }
872
873 static inline struct bbm_log_block_addr __cpu_to_le48(unsigned long long sec)
874 {
875         struct bbm_log_block_addr addr;
876
877         addr.w1 =  __cpu_to_le16((__u16)(sec & 0xffff));
878         addr.dw1 = __cpu_to_le32((__u32)(sec >> 16) & 0xffffffff);
879         return addr;
880 }
881
882 /* get size of the bbm log */
883 static __u32 get_imsm_bbm_log_size(struct bbm_log *log)
884 {
885         if (!log || log->entry_count == 0)
886                 return 0;
887
888         return sizeof(log->signature) +
889                 sizeof(log->entry_count) +
890                 log->entry_count * sizeof(struct bbm_log_entry);
891 }
892
893 /* check if bad block is not partially stored in bbm log */
894 static int is_stored_in_bbm(struct bbm_log *log, const __u8 idx, const unsigned
895                             long long sector, const int length, __u32 *pos)
896 {
897         __u32 i;
898
899         for (i = *pos; i < log->entry_count; i++) {
900                 struct bbm_log_entry *entry = &log->marked_block_entries[i];
901                 unsigned long long bb_start;
902                 unsigned long long bb_end;
903
904                 bb_start = __le48_to_cpu(&entry->defective_block_start);
905                 bb_end = bb_start + (entry->marked_count + 1);
906
907                 if ((entry->disk_ordinal == idx) && (bb_start >= sector) &&
908                     (bb_end <= sector + length)) {
909                         *pos = i;
910                         return 1;
911                 }
912         }
913         return 0;
914 }
915
916 /* record new bad block in bbm log */
917 static int record_new_badblock(struct bbm_log *log, const __u8 idx, unsigned
918                                long long sector, int length)
919 {
920         int new_bb = 0;
921         __u32 pos = 0;
922         struct bbm_log_entry *entry = NULL;
923
924         while (is_stored_in_bbm(log, idx, sector, length, &pos)) {
925                 struct bbm_log_entry *e = &log->marked_block_entries[pos];
926
927                 if ((e->marked_count + 1 == BBM_LOG_MAX_LBA_ENTRY_VAL) &&
928                     (__le48_to_cpu(&e->defective_block_start) == sector)) {
929                         sector += BBM_LOG_MAX_LBA_ENTRY_VAL;
930                         length -= BBM_LOG_MAX_LBA_ENTRY_VAL;
931                         pos = pos + 1;
932                         continue;
933                 }
934                 entry = e;
935                 break;
936         }
937
938         if (entry) {
939                 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
940                         BBM_LOG_MAX_LBA_ENTRY_VAL;
941                 entry->defective_block_start = __cpu_to_le48(sector);
942                 entry->marked_count = cnt - 1;
943                 if (cnt == length)
944                         return 1;
945                 sector += cnt;
946                 length -= cnt;
947         }
948
949         new_bb = ROUND_UP(length, BBM_LOG_MAX_LBA_ENTRY_VAL) /
950                 BBM_LOG_MAX_LBA_ENTRY_VAL;
951         if (log->entry_count + new_bb > BBM_LOG_MAX_ENTRIES)
952                 return 0;
953
954         while (length > 0) {
955                 int cnt = (length <= BBM_LOG_MAX_LBA_ENTRY_VAL) ? length :
956                         BBM_LOG_MAX_LBA_ENTRY_VAL;
957                 struct bbm_log_entry *entry =
958                         &log->marked_block_entries[log->entry_count];
959
960                 entry->defective_block_start = __cpu_to_le48(sector);
961                 entry->marked_count = cnt - 1;
962                 entry->disk_ordinal = idx;
963
964                 sector += cnt;
965                 length -= cnt;
966
967                 log->entry_count++;
968         }
969
970         return new_bb;
971 }
972
973 /* clear all bad blocks for given disk */
974 static void clear_disk_badblocks(struct bbm_log *log, const __u8 idx)
975 {
976         __u32 i = 0;
977
978         while (i < log->entry_count) {
979                 struct bbm_log_entry *entries = log->marked_block_entries;
980
981                 if (entries[i].disk_ordinal == idx) {
982                         if (i < log->entry_count - 1)
983                                 entries[i] = entries[log->entry_count - 1];
984                         log->entry_count--;
985                 } else {
986                         i++;
987                 }
988         }
989 }
990
991 /* clear given bad block */
992 static int clear_badblock(struct bbm_log *log, const __u8 idx, const unsigned
993                           long long sector, const int length) {
994         __u32 i = 0;
995
996         while (i < log->entry_count) {
997                 struct bbm_log_entry *entries = log->marked_block_entries;
998
999                 if ((entries[i].disk_ordinal == idx) &&
1000                     (__le48_to_cpu(&entries[i].defective_block_start) ==
1001                      sector) && (entries[i].marked_count + 1 == length)) {
1002                         if (i < log->entry_count - 1)
1003                                 entries[i] = entries[log->entry_count - 1];
1004                         log->entry_count--;
1005                         break;
1006                 }
1007                 i++;
1008         }
1009
1010         return 1;
1011 }
1012
1013 /* allocate and load BBM log from metadata */
1014 static int load_bbm_log(struct intel_super *super)
1015 {
1016         struct imsm_super *mpb = super->anchor;
1017         __u32 bbm_log_size =  __le32_to_cpu(mpb->bbm_log_size);
1018
1019         super->bbm_log = xcalloc(1, sizeof(struct bbm_log));
1020         if (!super->bbm_log)
1021                 return 1;
1022
1023         if (bbm_log_size) {
1024                 struct bbm_log *log = (void *)mpb +
1025                         __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1026
1027                 __u32 entry_count;
1028
1029                 if (bbm_log_size < sizeof(log->signature) +
1030                     sizeof(log->entry_count))
1031                         return 2;
1032
1033                 entry_count = __le32_to_cpu(log->entry_count);
1034                 if ((__le32_to_cpu(log->signature) != BBM_LOG_SIGNATURE) ||
1035                     (entry_count > BBM_LOG_MAX_ENTRIES))
1036                         return 3;
1037
1038                 if (bbm_log_size !=
1039                     sizeof(log->signature) + sizeof(log->entry_count) +
1040                     entry_count * sizeof(struct bbm_log_entry))
1041                         return 4;
1042
1043                 memcpy(super->bbm_log, log, bbm_log_size);
1044         } else {
1045                 super->bbm_log->signature = __cpu_to_le32(BBM_LOG_SIGNATURE);
1046                 super->bbm_log->entry_count = 0;
1047         }
1048
1049         return 0;
1050 }
1051
1052 /* checks if bad block is within volume boundaries */
1053 static int is_bad_block_in_volume(const struct bbm_log_entry *entry,
1054                         const unsigned long long start_sector,
1055                         const unsigned long long size)
1056 {
1057         unsigned long long bb_start;
1058         unsigned long long bb_end;
1059
1060         bb_start = __le48_to_cpu(&entry->defective_block_start);
1061         bb_end = bb_start + (entry->marked_count + 1);
1062
1063         if (((bb_start >= start_sector) && (bb_start < start_sector + size)) ||
1064             ((bb_end >= start_sector) && (bb_end <= start_sector + size)))
1065                 return 1;
1066
1067         return 0;
1068 }
1069
1070 /* get list of bad blocks on a drive for a volume */
1071 static void get_volume_badblocks(const struct bbm_log *log, const __u8 idx,
1072                         const unsigned long long start_sector,
1073                         const unsigned long long size,
1074                         struct md_bb *bbs)
1075 {
1076         __u32 count = 0;
1077         __u32 i;
1078
1079         for (i = 0; i < log->entry_count; i++) {
1080                 const struct bbm_log_entry *ent =
1081                         &log->marked_block_entries[i];
1082                 struct md_bb_entry *bb;
1083
1084                 if ((ent->disk_ordinal == idx) &&
1085                     is_bad_block_in_volume(ent, start_sector, size)) {
1086
1087                         if (!bbs->entries) {
1088                                 bbs->entries = xmalloc(BBM_LOG_MAX_ENTRIES *
1089                                                      sizeof(*bb));
1090                                 if (!bbs->entries)
1091                                         break;
1092                         }
1093
1094                         bb = &bbs->entries[count++];
1095                         bb->sector = __le48_to_cpu(&ent->defective_block_start);
1096                         bb->length = ent->marked_count + 1;
1097                 }
1098         }
1099         bbs->count = count;
1100 }
1101
1102 /*
1103  * for second_map:
1104  *  == MAP_0 get first map
1105  *  == MAP_1 get second map
1106  *  == MAP_X than get map according to the current migr_state
1107  */
1108 static __u32 get_imsm_ord_tbl_ent(struct imsm_dev *dev,
1109                                   int slot,
1110                                   int second_map)
1111 {
1112         struct imsm_map *map;
1113
1114         map = get_imsm_map(dev, second_map);
1115
1116         /* top byte identifies disk under rebuild */
1117         return __le32_to_cpu(map->disk_ord_tbl[slot]);
1118 }
1119
1120 #define ord_to_idx(ord) (((ord) << 8) >> 8)
1121 static __u32 get_imsm_disk_idx(struct imsm_dev *dev, int slot, int second_map)
1122 {
1123         __u32 ord = get_imsm_ord_tbl_ent(dev, slot, second_map);
1124
1125         return ord_to_idx(ord);
1126 }
1127
1128 static void set_imsm_ord_tbl_ent(struct imsm_map *map, int slot, __u32 ord)
1129 {
1130         map->disk_ord_tbl[slot] = __cpu_to_le32(ord);
1131 }
1132
1133 static int get_imsm_disk_slot(struct imsm_map *map, unsigned idx)
1134 {
1135         int slot;
1136         __u32 ord;
1137
1138         for (slot = 0; slot < map->num_members; slot++) {
1139                 ord = __le32_to_cpu(map->disk_ord_tbl[slot]);
1140                 if (ord_to_idx(ord) == idx)
1141                         return slot;
1142         }
1143
1144         return -1;
1145 }
1146
1147 static int get_imsm_raid_level(struct imsm_map *map)
1148 {
1149         if (map->raid_level == 1) {
1150                 if (map->num_members == 2)
1151                         return 1;
1152                 else
1153                         return 10;
1154         }
1155
1156         return map->raid_level;
1157 }
1158
1159 static int cmp_extent(const void *av, const void *bv)
1160 {
1161         const struct extent *a = av;
1162         const struct extent *b = bv;
1163         if (a->start < b->start)
1164                 return -1;
1165         if (a->start > b->start)
1166                 return 1;
1167         return 0;
1168 }
1169
1170 static int count_memberships(struct dl *dl, struct intel_super *super)
1171 {
1172         int memberships = 0;
1173         int i;
1174
1175         for (i = 0; i < super->anchor->num_raid_devs; i++) {
1176                 struct imsm_dev *dev = get_imsm_dev(super, i);
1177                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1178
1179                 if (get_imsm_disk_slot(map, dl->index) >= 0)
1180                         memberships++;
1181         }
1182
1183         return memberships;
1184 }
1185
1186 static __u32 imsm_min_reserved_sectors(struct intel_super *super);
1187
1188 static int split_ull(unsigned long long n, void *lo, void *hi)
1189 {
1190         if (lo == 0 || hi == 0)
1191                 return 1;
1192         __put_unaligned32(__cpu_to_le32((__u32)n), lo);
1193         __put_unaligned32(__cpu_to_le32((n >> 32)), hi);
1194         return 0;
1195 }
1196
1197 static unsigned long long join_u32(__u32 lo, __u32 hi)
1198 {
1199         return (unsigned long long)__le32_to_cpu(lo) |
1200                (((unsigned long long)__le32_to_cpu(hi)) << 32);
1201 }
1202
1203 static unsigned long long total_blocks(struct imsm_disk *disk)
1204 {
1205         if (disk == NULL)
1206                 return 0;
1207         return join_u32(disk->total_blocks_lo, disk->total_blocks_hi);
1208 }
1209
1210 static unsigned long long pba_of_lba0(struct imsm_map *map)
1211 {
1212         if (map == NULL)
1213                 return 0;
1214         return join_u32(map->pba_of_lba0_lo, map->pba_of_lba0_hi);
1215 }
1216
1217 static unsigned long long blocks_per_member(struct imsm_map *map)
1218 {
1219         if (map == NULL)
1220                 return 0;
1221         return join_u32(map->blocks_per_member_lo, map->blocks_per_member_hi);
1222 }
1223
1224 static unsigned long long num_data_stripes(struct imsm_map *map)
1225 {
1226         if (map == NULL)
1227                 return 0;
1228         return join_u32(map->num_data_stripes_lo, map->num_data_stripes_hi);
1229 }
1230
1231 static unsigned long long imsm_dev_size(struct imsm_dev *dev)
1232 {
1233         if (dev == NULL)
1234                 return 0;
1235         return join_u32(dev->size_low, dev->size_high);
1236 }
1237
1238 static unsigned long long migr_chkp_area_pba(struct migr_record *migr_rec)
1239 {
1240         if (migr_rec == NULL)
1241                 return 0;
1242         return join_u32(migr_rec->ckpt_area_pba_lo,
1243                         migr_rec->ckpt_area_pba_hi);
1244 }
1245
1246 static unsigned long long current_migr_unit(struct migr_record *migr_rec)
1247 {
1248         if (migr_rec == NULL)
1249                 return 0;
1250         return join_u32(migr_rec->curr_migr_unit_lo,
1251                         migr_rec->curr_migr_unit_hi);
1252 }
1253
1254 static unsigned long long migr_dest_1st_member_lba(struct migr_record *migr_rec)
1255 {
1256         if (migr_rec == NULL)
1257                 return 0;
1258         return join_u32(migr_rec->dest_1st_member_lba_lo,
1259                         migr_rec->dest_1st_member_lba_hi);
1260 }
1261
1262 static unsigned long long get_num_migr_units(struct migr_record *migr_rec)
1263 {
1264         if (migr_rec == NULL)
1265                 return 0;
1266         return join_u32(migr_rec->num_migr_units_lo,
1267                         migr_rec->num_migr_units_hi);
1268 }
1269
1270 static void set_total_blocks(struct imsm_disk *disk, unsigned long long n)
1271 {
1272         split_ull(n, &disk->total_blocks_lo, &disk->total_blocks_hi);
1273 }
1274
1275 static void set_pba_of_lba0(struct imsm_map *map, unsigned long long n)
1276 {
1277         split_ull(n, &map->pba_of_lba0_lo, &map->pba_of_lba0_hi);
1278 }
1279
1280 static void set_blocks_per_member(struct imsm_map *map, unsigned long long n)
1281 {
1282         split_ull(n, &map->blocks_per_member_lo, &map->blocks_per_member_hi);
1283 }
1284
1285 static void set_num_data_stripes(struct imsm_map *map, unsigned long long n)
1286 {
1287         split_ull(n, &map->num_data_stripes_lo, &map->num_data_stripes_hi);
1288 }
1289
1290 static void set_imsm_dev_size(struct imsm_dev *dev, unsigned long long n)
1291 {
1292         split_ull(n, &dev->size_low, &dev->size_high);
1293 }
1294
1295 static void set_migr_chkp_area_pba(struct migr_record *migr_rec,
1296                                    unsigned long long n)
1297 {
1298         split_ull(n, &migr_rec->ckpt_area_pba_lo, &migr_rec->ckpt_area_pba_hi);
1299 }
1300
1301 static void set_current_migr_unit(struct migr_record *migr_rec,
1302                                   unsigned long long n)
1303 {
1304         split_ull(n, &migr_rec->curr_migr_unit_lo,
1305                   &migr_rec->curr_migr_unit_hi);
1306 }
1307
1308 static void set_migr_dest_1st_member_lba(struct migr_record *migr_rec,
1309                                          unsigned long long n)
1310 {
1311         split_ull(n, &migr_rec->dest_1st_member_lba_lo,
1312                   &migr_rec->dest_1st_member_lba_hi);
1313 }
1314
1315 static void set_num_migr_units(struct migr_record *migr_rec,
1316                                unsigned long long n)
1317 {
1318         split_ull(n, &migr_rec->num_migr_units_lo,
1319                   &migr_rec->num_migr_units_hi);
1320 }
1321
1322 static unsigned long long per_dev_array_size(struct imsm_map *map)
1323 {
1324         unsigned long long array_size = 0;
1325
1326         if (map == NULL)
1327                 return array_size;
1328
1329         array_size = num_data_stripes(map) * map->blocks_per_strip;
1330         if (get_imsm_raid_level(map) == 1 || get_imsm_raid_level(map) == 10)
1331                 array_size *= 2;
1332
1333         return array_size;
1334 }
1335
1336 static struct extent *get_extents(struct intel_super *super, struct dl *dl,
1337                                   int get_minimal_reservation)
1338 {
1339         /* find a list of used extents on the given physical device */
1340         struct extent *rv, *e;
1341         int i;
1342         int memberships = count_memberships(dl, super);
1343         __u32 reservation;
1344
1345         /* trim the reserved area for spares, so they can join any array
1346          * regardless of whether the OROM has assigned sectors from the
1347          * IMSM_RESERVED_SECTORS region
1348          */
1349         if (dl->index == -1 || get_minimal_reservation)
1350                 reservation = imsm_min_reserved_sectors(super);
1351         else
1352                 reservation = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1353
1354         rv = xcalloc(sizeof(struct extent), (memberships + 1));
1355         e = rv;
1356
1357         for (i = 0; i < super->anchor->num_raid_devs; i++) {
1358                 struct imsm_dev *dev = get_imsm_dev(super, i);
1359                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1360
1361                 if (get_imsm_disk_slot(map, dl->index) >= 0) {
1362                         e->start = pba_of_lba0(map);
1363                         e->size = per_dev_array_size(map);
1364                         e++;
1365                 }
1366         }
1367         qsort(rv, memberships, sizeof(*rv), cmp_extent);
1368
1369         /* determine the start of the metadata
1370          * when no raid devices are defined use the default
1371          * ...otherwise allow the metadata to truncate the value
1372          * as is the case with older versions of imsm
1373          */
1374         if (memberships) {
1375                 struct extent *last = &rv[memberships - 1];
1376                 unsigned long long remainder;
1377
1378                 remainder = total_blocks(&dl->disk) - (last->start + last->size);
1379                 /* round down to 1k block to satisfy precision of the kernel
1380                  * 'size' interface
1381                  */
1382                 remainder &= ~1UL;
1383                 /* make sure remainder is still sane */
1384                 if (remainder < (unsigned)ROUND_UP(super->len, 512) >> 9)
1385                         remainder = ROUND_UP(super->len, 512) >> 9;
1386                 if (reservation > remainder)
1387                         reservation = remainder;
1388         }
1389         e->start = total_blocks(&dl->disk) - reservation;
1390         e->size = 0;
1391         return rv;
1392 }
1393
1394 /* try to determine how much space is reserved for metadata from
1395  * the last get_extents() entry, otherwise fallback to the
1396  * default
1397  */
1398 static __u32 imsm_reserved_sectors(struct intel_super *super, struct dl *dl)
1399 {
1400         struct extent *e;
1401         int i;
1402         __u32 rv;
1403
1404         /* for spares just return a minimal reservation which will grow
1405          * once the spare is picked up by an array
1406          */
1407         if (dl->index == -1)
1408                 return MPB_SECTOR_CNT;
1409
1410         e = get_extents(super, dl, 0);
1411         if (!e)
1412                 return MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1413
1414         /* scroll to last entry */
1415         for (i = 0; e[i].size; i++)
1416                 continue;
1417
1418         rv = total_blocks(&dl->disk) - e[i].start;
1419
1420         free(e);
1421
1422         return rv;
1423 }
1424
1425 static int is_spare(struct imsm_disk *disk)
1426 {
1427         return (disk->status & SPARE_DISK) == SPARE_DISK;
1428 }
1429
1430 static int is_configured(struct imsm_disk *disk)
1431 {
1432         return (disk->status & CONFIGURED_DISK) == CONFIGURED_DISK;
1433 }
1434
1435 static int is_failed(struct imsm_disk *disk)
1436 {
1437         return (disk->status & FAILED_DISK) == FAILED_DISK;
1438 }
1439
1440 static int is_journal(struct imsm_disk *disk)
1441 {
1442         return (disk->status & JOURNAL_DISK) == JOURNAL_DISK;
1443 }
1444
1445 /* round array size down to closest MB and ensure it splits evenly
1446  * between members
1447  */
1448 static unsigned long long round_size_to_mb(unsigned long long size, unsigned int
1449                                            disk_count)
1450 {
1451         size /= disk_count;
1452         size = (size >> SECT_PER_MB_SHIFT) << SECT_PER_MB_SHIFT;
1453         size *= disk_count;
1454
1455         return size;
1456 }
1457
1458 static int able_to_resync(int raid_level, int missing_disks)
1459 {
1460         int max_missing_disks = 0;
1461
1462         switch (raid_level) {
1463         case 10:
1464                 max_missing_disks = 1;
1465                 break;
1466         default:
1467                 max_missing_disks = 0;
1468         }
1469         return missing_disks <= max_missing_disks;
1470 }
1471
1472 /* try to determine how much space is reserved for metadata from
1473  * the last get_extents() entry on the smallest active disk,
1474  * otherwise fallback to the default
1475  */
1476 static __u32 imsm_min_reserved_sectors(struct intel_super *super)
1477 {
1478         struct extent *e;
1479         int i;
1480         unsigned long long min_active;
1481         __u32 remainder;
1482         __u32 rv = MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS;
1483         struct dl *dl, *dl_min = NULL;
1484
1485         if (!super)
1486                 return rv;
1487
1488         min_active = 0;
1489         for (dl = super->disks; dl; dl = dl->next) {
1490                 if (dl->index < 0)
1491                         continue;
1492                 unsigned long long blocks = total_blocks(&dl->disk);
1493                 if (blocks < min_active || min_active == 0) {
1494                         dl_min = dl;
1495                         min_active = blocks;
1496                 }
1497         }
1498         if (!dl_min)
1499                 return rv;
1500
1501         /* find last lba used by subarrays on the smallest active disk */
1502         e = get_extents(super, dl_min, 0);
1503         if (!e)
1504                 return rv;
1505         for (i = 0; e[i].size; i++)
1506                 continue;
1507
1508         remainder = min_active - e[i].start;
1509         free(e);
1510
1511         /* to give priority to recovery we should not require full
1512            IMSM_RESERVED_SECTORS from the spare */
1513         rv = MPB_SECTOR_CNT + NUM_BLOCKS_DIRTY_STRIPE_REGION;
1514
1515         /* if real reservation is smaller use that value */
1516         return  (remainder < rv) ? remainder : rv;
1517 }
1518
1519 /*
1520  * Return minimum size of a spare and sector size
1521  * that can be used in this array
1522  */
1523 int get_spare_criteria_imsm(struct supertype *st, struct spare_criteria *c)
1524 {
1525         struct intel_super *super = st->sb;
1526         struct dl *dl;
1527         struct extent *e;
1528         int i;
1529         unsigned long long size = 0;
1530
1531         c->min_size = 0;
1532         c->sector_size = 0;
1533
1534         if (!super)
1535                 return -EINVAL;
1536         /* find first active disk in array */
1537         dl = super->disks;
1538         while (dl && (is_failed(&dl->disk) || dl->index == -1))
1539                 dl = dl->next;
1540         if (!dl)
1541                 return -EINVAL;
1542         /* find last lba used by subarrays */
1543         e = get_extents(super, dl, 0);
1544         if (!e)
1545                 return -EINVAL;
1546         for (i = 0; e[i].size; i++)
1547                 continue;
1548         if (i > 0)
1549                 size = e[i-1].start + e[i-1].size;
1550         free(e);
1551
1552         /* add the amount of space needed for metadata */
1553         size += imsm_min_reserved_sectors(super);
1554
1555         c->min_size = size * 512;
1556         c->sector_size = super->sector_size;
1557
1558         return 0;
1559 }
1560
1561 static int is_gen_migration(struct imsm_dev *dev);
1562
1563 #define IMSM_4K_DIV 8
1564
1565 static __u64 blocks_per_migr_unit(struct intel_super *super,
1566                                   struct imsm_dev *dev);
1567
1568 static void print_imsm_dev(struct intel_super *super,
1569                            struct imsm_dev *dev,
1570                            char *uuid,
1571                            int disk_idx)
1572 {
1573         __u64 sz;
1574         int slot, i;
1575         struct imsm_map *map = get_imsm_map(dev, MAP_0);
1576         struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
1577         __u32 ord;
1578
1579         printf("\n");
1580         printf("[%.16s]:\n", dev->volume);
1581         printf("           UUID : %s\n", uuid);
1582         printf("     RAID Level : %d", get_imsm_raid_level(map));
1583         if (map2)
1584                 printf(" <-- %d", get_imsm_raid_level(map2));
1585         printf("\n");
1586         printf("        Members : %d", map->num_members);
1587         if (map2)
1588                 printf(" <-- %d", map2->num_members);
1589         printf("\n");
1590         printf("          Slots : [");
1591         for (i = 0; i < map->num_members; i++) {
1592                 ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
1593                 printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1594         }
1595         printf("]");
1596         if (map2) {
1597                 printf(" <-- [");
1598                 for (i = 0; i < map2->num_members; i++) {
1599                         ord = get_imsm_ord_tbl_ent(dev, i, MAP_1);
1600                         printf("%s", ord & IMSM_ORD_REBUILD ? "_" : "U");
1601                 }
1602                 printf("]");
1603         }
1604         printf("\n");
1605         printf("    Failed disk : ");
1606         if (map->failed_disk_num == 0xff)
1607                 printf("none");
1608         else
1609                 printf("%i", map->failed_disk_num);
1610         printf("\n");
1611         slot = get_imsm_disk_slot(map, disk_idx);
1612         if (slot >= 0) {
1613                 ord = get_imsm_ord_tbl_ent(dev, slot, MAP_X);
1614                 printf("      This Slot : %d%s\n", slot,
1615                        ord & IMSM_ORD_REBUILD ? " (out-of-sync)" : "");
1616         } else
1617                 printf("      This Slot : ?\n");
1618         printf("    Sector Size : %u\n", super->sector_size);
1619         sz = imsm_dev_size(dev);
1620         printf("     Array Size : %llu%s\n",
1621                    (unsigned long long)sz * 512 / super->sector_size,
1622                human_size(sz * 512));
1623         sz = blocks_per_member(map);
1624         printf("   Per Dev Size : %llu%s\n",
1625                    (unsigned long long)sz * 512 / super->sector_size,
1626                human_size(sz * 512));
1627         printf("  Sector Offset : %llu\n",
1628                 pba_of_lba0(map));
1629         printf("    Num Stripes : %llu\n",
1630                 num_data_stripes(map));
1631         printf("     Chunk Size : %u KiB",
1632                 __le16_to_cpu(map->blocks_per_strip) / 2);
1633         if (map2)
1634                 printf(" <-- %u KiB",
1635                         __le16_to_cpu(map2->blocks_per_strip) / 2);
1636         printf("\n");
1637         printf("       Reserved : %d\n", __le32_to_cpu(dev->reserved_blocks));
1638         printf("  Migrate State : ");
1639         if (dev->vol.migr_state) {
1640                 if (migr_type(dev) == MIGR_INIT)
1641                         printf("initialize\n");
1642                 else if (migr_type(dev) == MIGR_REBUILD)
1643                         printf("rebuild\n");
1644                 else if (migr_type(dev) == MIGR_VERIFY)
1645                         printf("check\n");
1646                 else if (migr_type(dev) == MIGR_GEN_MIGR)
1647                         printf("general migration\n");
1648                 else if (migr_type(dev) == MIGR_STATE_CHANGE)
1649                         printf("state change\n");
1650                 else if (migr_type(dev) == MIGR_REPAIR)
1651                         printf("repair\n");
1652                 else
1653                         printf("<unknown:%d>\n", migr_type(dev));
1654         } else
1655                 printf("idle\n");
1656         printf("      Map State : %s", map_state_str[map->map_state]);
1657         if (dev->vol.migr_state) {
1658                 struct imsm_map *map = get_imsm_map(dev, MAP_1);
1659
1660                 printf(" <-- %s", map_state_str[map->map_state]);
1661                 printf("\n     Checkpoint : %u ",
1662                            __le32_to_cpu(dev->vol.curr_migr_unit));
1663                 if (is_gen_migration(dev) && (slot > 1 || slot < 0))
1664                         printf("(N/A)");
1665                 else
1666                         printf("(%llu)", (unsigned long long)
1667                                    blocks_per_migr_unit(super, dev));
1668         }
1669         printf("\n");
1670         printf("    Dirty State : %s\n", (dev->vol.dirty & RAIDVOL_DIRTY) ?
1671                                          "dirty" : "clean");
1672         printf("     RWH Policy : ");
1673         if (dev->rwh_policy == RWH_OFF || dev->rwh_policy == RWH_MULTIPLE_OFF)
1674                 printf("off\n");
1675         else if (dev->rwh_policy == RWH_DISTRIBUTED)
1676                 printf("PPL distributed\n");
1677         else if (dev->rwh_policy == RWH_JOURNALING_DRIVE)
1678                 printf("PPL journaling drive\n");
1679         else if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
1680                 printf("Multiple distributed PPLs\n");
1681         else if (dev->rwh_policy == RWH_MULTIPLE_PPLS_JOURNALING_DRIVE)
1682                 printf("Multiple PPLs on journaling drive\n");
1683         else
1684                 printf("<unknown:%d>\n", dev->rwh_policy);
1685 }
1686
1687 static void print_imsm_disk(struct imsm_disk *disk,
1688                             int index,
1689                             __u32 reserved,
1690                             unsigned int sector_size) {
1691         char str[MAX_RAID_SERIAL_LEN + 1];
1692         __u64 sz;
1693
1694         if (index < -1 || !disk)
1695                 return;
1696
1697         printf("\n");
1698         snprintf(str, MAX_RAID_SERIAL_LEN + 1, "%s", disk->serial);
1699         if (index >= 0)
1700                 printf("  Disk%02d Serial : %s\n", index, str);
1701         else
1702                 printf("    Disk Serial : %s\n", str);
1703         printf("          State :%s%s%s%s\n", is_spare(disk) ? " spare" : "",
1704                                               is_configured(disk) ? " active" : "",
1705                                               is_failed(disk) ? " failed" : "",
1706                                               is_journal(disk) ? " journal" : "");
1707         printf("             Id : %08x\n", __le32_to_cpu(disk->scsi_id));
1708         sz = total_blocks(disk) - reserved;
1709         printf("    Usable Size : %llu%s\n",
1710                (unsigned long long)sz * 512 / sector_size,
1711                human_size(sz * 512));
1712 }
1713
1714 void convert_to_4k_imsm_migr_rec(struct intel_super *super)
1715 {
1716         struct migr_record *migr_rec = super->migr_rec;
1717
1718         migr_rec->blocks_per_unit /= IMSM_4K_DIV;
1719         migr_rec->dest_depth_per_unit /= IMSM_4K_DIV;
1720         split_ull((join_u32(migr_rec->post_migr_vol_cap,
1721                  migr_rec->post_migr_vol_cap_hi) / IMSM_4K_DIV),
1722                  &migr_rec->post_migr_vol_cap, &migr_rec->post_migr_vol_cap_hi);
1723         set_migr_chkp_area_pba(migr_rec,
1724                  migr_chkp_area_pba(migr_rec) / IMSM_4K_DIV);
1725         set_migr_dest_1st_member_lba(migr_rec,
1726                  migr_dest_1st_member_lba(migr_rec) / IMSM_4K_DIV);
1727 }
1728
1729 void convert_to_4k_imsm_disk(struct imsm_disk *disk)
1730 {
1731         set_total_blocks(disk, (total_blocks(disk)/IMSM_4K_DIV));
1732 }
1733
1734 void convert_to_4k(struct intel_super *super)
1735 {
1736         struct imsm_super *mpb = super->anchor;
1737         struct imsm_disk *disk;
1738         int i;
1739         __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1740
1741         for (i = 0; i < mpb->num_disks ; i++) {
1742                 disk = __get_imsm_disk(mpb, i);
1743                 /* disk */
1744                 convert_to_4k_imsm_disk(disk);
1745         }
1746         for (i = 0; i < mpb->num_raid_devs; i++) {
1747                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1748                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1749                 /* dev */
1750                 set_imsm_dev_size(dev, imsm_dev_size(dev)/IMSM_4K_DIV);
1751                 dev->vol.curr_migr_unit /= IMSM_4K_DIV;
1752
1753                 /* map0 */
1754                 set_blocks_per_member(map, blocks_per_member(map)/IMSM_4K_DIV);
1755                 map->blocks_per_strip /= IMSM_4K_DIV;
1756                 set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1757
1758                 if (dev->vol.migr_state) {
1759                         /* map1 */
1760                         map = get_imsm_map(dev, MAP_1);
1761                         set_blocks_per_member(map,
1762                             blocks_per_member(map)/IMSM_4K_DIV);
1763                         map->blocks_per_strip /= IMSM_4K_DIV;
1764                         set_pba_of_lba0(map, pba_of_lba0(map)/IMSM_4K_DIV);
1765                 }
1766         }
1767         if (bbm_log_size) {
1768                 struct bbm_log *log = (void *)mpb +
1769                         __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1770                 __u32 i;
1771
1772                 for (i = 0; i < log->entry_count; i++) {
1773                         struct bbm_log_entry *entry =
1774                                 &log->marked_block_entries[i];
1775
1776                         __u8 count = entry->marked_count + 1;
1777                         unsigned long long sector =
1778                                 __le48_to_cpu(&entry->defective_block_start);
1779
1780                         entry->defective_block_start =
1781                                 __cpu_to_le48(sector/IMSM_4K_DIV);
1782                         entry->marked_count = max(count/IMSM_4K_DIV, 1) - 1;
1783                 }
1784         }
1785
1786         mpb->check_sum = __gen_imsm_checksum(mpb);
1787 }
1788
1789 void examine_migr_rec_imsm(struct intel_super *super)
1790 {
1791         struct migr_record *migr_rec = super->migr_rec;
1792         struct imsm_super *mpb = super->anchor;
1793         int i;
1794
1795         for (i = 0; i < mpb->num_raid_devs; i++) {
1796                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1797                 struct imsm_map *map;
1798                 int slot = -1;
1799
1800                 if (is_gen_migration(dev) == 0)
1801                                 continue;
1802
1803                 printf("\nMigration Record Information:");
1804
1805                 /* first map under migration */
1806                 map = get_imsm_map(dev, MAP_0);
1807                 if (map)
1808                         slot = get_imsm_disk_slot(map, super->disks->index);
1809                 if (map == NULL || slot > 1 || slot < 0) {
1810                         printf(" Empty\n                              ");
1811                         printf("Examine one of first two disks in array\n");
1812                         break;
1813                 }
1814                 printf("\n                     Status : ");
1815                 if (__le32_to_cpu(migr_rec->rec_status) == UNIT_SRC_NORMAL)
1816                         printf("Normal\n");
1817                 else
1818                         printf("Contains Data\n");
1819                 printf("               Current Unit : %llu\n",
1820                        current_migr_unit(migr_rec));
1821                 printf("                     Family : %u\n",
1822                        __le32_to_cpu(migr_rec->family_num));
1823                 printf("                  Ascending : %u\n",
1824                        __le32_to_cpu(migr_rec->ascending_migr));
1825                 printf("            Blocks Per Unit : %u\n",
1826                        __le32_to_cpu(migr_rec->blocks_per_unit));
1827                 printf("       Dest. Depth Per Unit : %u\n",
1828                        __le32_to_cpu(migr_rec->dest_depth_per_unit));
1829                 printf("        Checkpoint Area pba : %llu\n",
1830                        migr_chkp_area_pba(migr_rec));
1831                 printf("           First member lba : %llu\n",
1832                        migr_dest_1st_member_lba(migr_rec));
1833                 printf("      Total Number of Units : %llu\n",
1834                        get_num_migr_units(migr_rec));
1835                 printf("             Size of volume : %llu\n",
1836                        join_u32(migr_rec->post_migr_vol_cap,
1837                                 migr_rec->post_migr_vol_cap_hi));
1838                 printf("       Record was read from : %u\n",
1839                        __le32_to_cpu(migr_rec->ckpt_read_disk_num));
1840
1841                 break;
1842         }
1843 }
1844
1845 void convert_from_4k_imsm_migr_rec(struct intel_super *super)
1846 {
1847         struct migr_record *migr_rec = super->migr_rec;
1848
1849         migr_rec->blocks_per_unit *= IMSM_4K_DIV;
1850         migr_rec->dest_depth_per_unit *= IMSM_4K_DIV;
1851         split_ull((join_u32(migr_rec->post_migr_vol_cap,
1852                  migr_rec->post_migr_vol_cap_hi) * IMSM_4K_DIV),
1853                  &migr_rec->post_migr_vol_cap,
1854                  &migr_rec->post_migr_vol_cap_hi);
1855         set_migr_chkp_area_pba(migr_rec,
1856                  migr_chkp_area_pba(migr_rec) * IMSM_4K_DIV);
1857         set_migr_dest_1st_member_lba(migr_rec,
1858                  migr_dest_1st_member_lba(migr_rec) * IMSM_4K_DIV);
1859 }
1860
1861 void convert_from_4k(struct intel_super *super)
1862 {
1863         struct imsm_super *mpb = super->anchor;
1864         struct imsm_disk *disk;
1865         int i;
1866         __u32 bbm_log_size = __le32_to_cpu(mpb->bbm_log_size);
1867
1868         for (i = 0; i < mpb->num_disks ; i++) {
1869                 disk = __get_imsm_disk(mpb, i);
1870                 /* disk */
1871                 set_total_blocks(disk, (total_blocks(disk)*IMSM_4K_DIV));
1872         }
1873
1874         for (i = 0; i < mpb->num_raid_devs; i++) {
1875                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
1876                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
1877                 /* dev */
1878                 set_imsm_dev_size(dev, imsm_dev_size(dev)*IMSM_4K_DIV);
1879                 dev->vol.curr_migr_unit *= IMSM_4K_DIV;
1880
1881                 /* map0 */
1882                 set_blocks_per_member(map, blocks_per_member(map)*IMSM_4K_DIV);
1883                 map->blocks_per_strip *= IMSM_4K_DIV;
1884                 set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
1885
1886                 if (dev->vol.migr_state) {
1887                         /* map1 */
1888                         map = get_imsm_map(dev, MAP_1);
1889                         set_blocks_per_member(map,
1890                             blocks_per_member(map)*IMSM_4K_DIV);
1891                         map->blocks_per_strip *= IMSM_4K_DIV;
1892                         set_pba_of_lba0(map, pba_of_lba0(map)*IMSM_4K_DIV);
1893                 }
1894         }
1895         if (bbm_log_size) {
1896                 struct bbm_log *log = (void *)mpb +
1897                         __le32_to_cpu(mpb->mpb_size) - bbm_log_size;
1898                 __u32 i;
1899
1900                 for (i = 0; i < log->entry_count; i++) {
1901                         struct bbm_log_entry *entry =
1902                                 &log->marked_block_entries[i];
1903
1904                         __u8 count = entry->marked_count + 1;
1905                         unsigned long long sector =
1906                                 __le48_to_cpu(&entry->defective_block_start);
1907
1908                         entry->defective_block_start =
1909                                 __cpu_to_le48(sector*IMSM_4K_DIV);
1910                         entry->marked_count = count*IMSM_4K_DIV - 1;
1911                 }
1912         }
1913
1914         mpb->check_sum = __gen_imsm_checksum(mpb);
1915 }
1916
1917 /*******************************************************************************
1918  * function: imsm_check_attributes
1919  * Description: Function checks if features represented by attributes flags
1920  *              are supported by mdadm.
1921  * Parameters:
1922  *              attributes - Attributes read from metadata
1923  * Returns:
1924  *              0 - passed attributes contains unsupported features flags
1925  *              1 - all features are supported
1926  ******************************************************************************/
1927 static int imsm_check_attributes(__u32 attributes)
1928 {
1929         int ret_val = 1;
1930         __u32 not_supported = MPB_ATTRIB_SUPPORTED^0xffffffff;
1931
1932         not_supported &= ~MPB_ATTRIB_IGNORED;
1933
1934         not_supported &= attributes;
1935         if (not_supported) {
1936                 pr_err("(IMSM): Unsupported attributes : %x\n",
1937                         (unsigned)__le32_to_cpu(not_supported));
1938                 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1939                         dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY \n");
1940                         not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1941                 }
1942                 if (not_supported & MPB_ATTRIB_2TB) {
1943                         dprintf("\t\tMPB_ATTRIB_2TB\n");
1944                         not_supported ^= MPB_ATTRIB_2TB;
1945                 }
1946                 if (not_supported & MPB_ATTRIB_RAID0) {
1947                         dprintf("\t\tMPB_ATTRIB_RAID0\n");
1948                         not_supported ^= MPB_ATTRIB_RAID0;
1949                 }
1950                 if (not_supported & MPB_ATTRIB_RAID1) {
1951                         dprintf("\t\tMPB_ATTRIB_RAID1\n");
1952                         not_supported ^= MPB_ATTRIB_RAID1;
1953                 }
1954                 if (not_supported & MPB_ATTRIB_RAID10) {
1955                         dprintf("\t\tMPB_ATTRIB_RAID10\n");
1956                         not_supported ^= MPB_ATTRIB_RAID10;
1957                 }
1958                 if (not_supported & MPB_ATTRIB_RAID1E) {
1959                         dprintf("\t\tMPB_ATTRIB_RAID1E\n");
1960                         not_supported ^= MPB_ATTRIB_RAID1E;
1961                 }
1962                 if (not_supported & MPB_ATTRIB_RAID5) {
1963                 dprintf("\t\tMPB_ATTRIB_RAID5\n");
1964                         not_supported ^= MPB_ATTRIB_RAID5;
1965                 }
1966                 if (not_supported & MPB_ATTRIB_RAIDCNG) {
1967                         dprintf("\t\tMPB_ATTRIB_RAIDCNG\n");
1968                         not_supported ^= MPB_ATTRIB_RAIDCNG;
1969                 }
1970                 if (not_supported & MPB_ATTRIB_BBM) {
1971                         dprintf("\t\tMPB_ATTRIB_BBM\n");
1972                 not_supported ^= MPB_ATTRIB_BBM;
1973                 }
1974                 if (not_supported & MPB_ATTRIB_CHECKSUM_VERIFY) {
1975                         dprintf("\t\tMPB_ATTRIB_CHECKSUM_VERIFY (== MPB_ATTRIB_LEGACY)\n");
1976                         not_supported ^= MPB_ATTRIB_CHECKSUM_VERIFY;
1977                 }
1978                 if (not_supported & MPB_ATTRIB_EXP_STRIPE_SIZE) {
1979                         dprintf("\t\tMPB_ATTRIB_EXP_STRIP_SIZE\n");
1980                         not_supported ^= MPB_ATTRIB_EXP_STRIPE_SIZE;
1981                 }
1982                 if (not_supported & MPB_ATTRIB_2TB_DISK) {
1983                         dprintf("\t\tMPB_ATTRIB_2TB_DISK\n");
1984                         not_supported ^= MPB_ATTRIB_2TB_DISK;
1985                 }
1986                 if (not_supported & MPB_ATTRIB_NEVER_USE2) {
1987                         dprintf("\t\tMPB_ATTRIB_NEVER_USE2\n");
1988                         not_supported ^= MPB_ATTRIB_NEVER_USE2;
1989                 }
1990                 if (not_supported & MPB_ATTRIB_NEVER_USE) {
1991                         dprintf("\t\tMPB_ATTRIB_NEVER_USE\n");
1992                         not_supported ^= MPB_ATTRIB_NEVER_USE;
1993                 }
1994
1995                 if (not_supported)
1996                         dprintf("(IMSM): Unknown attributes : %x\n", not_supported);
1997
1998                 ret_val = 0;
1999         }
2000
2001         return ret_val;
2002 }
2003
2004 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map);
2005
2006 static void examine_super_imsm(struct supertype *st, char *homehost)
2007 {
2008         struct intel_super *super = st->sb;
2009         struct imsm_super *mpb = super->anchor;
2010         char str[MAX_SIGNATURE_LENGTH];
2011         int i;
2012         struct mdinfo info;
2013         char nbuf[64];
2014         __u32 sum;
2015         __u32 reserved = imsm_reserved_sectors(super, super->disks);
2016         struct dl *dl;
2017
2018         strncpy(str, (char *)mpb->sig, MPB_SIG_LEN);
2019         str[MPB_SIG_LEN-1] = '\0';
2020         printf("          Magic : %s\n", str);
2021         printf("        Version : %s\n", get_imsm_version(mpb));
2022         printf("    Orig Family : %08x\n", __le32_to_cpu(mpb->orig_family_num));
2023         printf("         Family : %08x\n", __le32_to_cpu(mpb->family_num));
2024         printf("     Generation : %08x\n", __le32_to_cpu(mpb->generation_num));
2025         printf("     Attributes : ");
2026         if (imsm_check_attributes(mpb->attributes))
2027                 printf("All supported\n");
2028         else
2029                 printf("not supported\n");
2030         getinfo_super_imsm(st, &info, NULL);
2031         fname_from_uuid(st, &info, nbuf, ':');
2032         printf("           UUID : %s\n", nbuf + 5);
2033         sum = __le32_to_cpu(mpb->check_sum);
2034         printf("       Checksum : %08x %s\n", sum,
2035                 __gen_imsm_checksum(mpb) == sum ? "correct" : "incorrect");
2036         printf("    MPB Sectors : %d\n", mpb_sectors(mpb, super->sector_size));
2037         printf("          Disks : %d\n", mpb->num_disks);
2038         printf("   RAID Devices : %d\n", mpb->num_raid_devs);
2039         print_imsm_disk(__get_imsm_disk(mpb, super->disks->index),
2040                         super->disks->index, reserved, super->sector_size);
2041         if (get_imsm_bbm_log_size(super->bbm_log)) {
2042                 struct bbm_log *log = super->bbm_log;
2043
2044                 printf("\n");
2045                 printf("Bad Block Management Log:\n");
2046                 printf("       Log Size : %d\n", __le32_to_cpu(mpb->bbm_log_size));
2047                 printf("      Signature : %x\n", __le32_to_cpu(log->signature));
2048                 printf("    Entry Count : %d\n", __le32_to_cpu(log->entry_count));
2049         }
2050         for (i = 0; i < mpb->num_raid_devs; i++) {
2051                 struct mdinfo info;
2052                 struct imsm_dev *dev = __get_imsm_dev(mpb, i);
2053
2054                 super->current_vol = i;
2055                 getinfo_super_imsm(st, &info, NULL);
2056                 fname_from_uuid(st, &info, nbuf, ':');
2057                 print_imsm_dev(super, dev, nbuf + 5, super->disks->index);
2058         }
2059         for (i = 0; i < mpb->num_disks; i++) {
2060                 if (i == super->disks->index)
2061                         continue;
2062                 print_imsm_disk(__get_imsm_disk(mpb, i), i, reserved,
2063                                 super->sector_size);
2064         }
2065
2066         for (dl = super->disks; dl; dl = dl->next)
2067                 if (dl->index == -1)
2068                         print_imsm_disk(&dl->disk, -1, reserved,
2069                                         super->sector_size);
2070
2071         examine_migr_rec_imsm(super);
2072 }
2073
2074 static void brief_examine_super_imsm(struct supertype *st, int verbose)
2075 {
2076         /* We just write a generic IMSM ARRAY entry */
2077         struct mdinfo info;
2078         char nbuf[64];
2079         struct intel_super *super = st->sb;
2080
2081         if (!super->anchor->num_raid_devs) {
2082                 printf("ARRAY metadata=imsm\n");
2083                 return;
2084         }
2085
2086         getinfo_super_imsm(st, &info, NULL);
2087         fname_from_uuid(st, &info, nbuf, ':');
2088         printf("ARRAY metadata=imsm UUID=%s\n", nbuf + 5);
2089 }
2090
2091 static void brief_examine_subarrays_imsm(struct supertype *st, int verbose)
2092 {
2093         /* We just write a generic IMSM ARRAY entry */
2094         struct mdinfo info;
2095         char nbuf[64];
2096         char nbuf1[64];
2097         struct intel_super *super = st->sb;
2098         int i;
2099
2100         if (!super->anchor->num_raid_devs)
2101                 return;
2102
2103         getinfo_super_imsm(st, &info, NULL);
2104         fname_from_uuid(st, &info, nbuf, ':');
2105         for (i = 0; i < super->anchor->num_raid_devs; i++) {
2106                 struct imsm_dev *dev = get_imsm_dev(super, i);
2107
2108                 super->current_vol = i;
2109                 getinfo_super_imsm(st, &info, NULL);
2110                 fname_from_uuid(st, &info, nbuf1, ':');
2111                 printf("ARRAY /dev/md/%.16s container=%s member=%d UUID=%s\n",
2112                        dev->volume, nbuf + 5, i, nbuf1 + 5);
2113         }
2114 }
2115
2116 static void export_examine_super_imsm(struct supertype *st)
2117 {
2118         struct intel_super *super = st->sb;
2119         struct imsm_super *mpb = super->anchor;
2120         struct mdinfo info;
2121         char nbuf[64];
2122
2123         getinfo_super_imsm(st, &info, NULL);
2124         fname_from_uuid(st, &info, nbuf, ':');
2125         printf("MD_METADATA=imsm\n");
2126         printf("MD_LEVEL=container\n");
2127         printf("MD_UUID=%s\n", nbuf+5);
2128         printf("MD_DEVICES=%u\n", mpb->num_disks);
2129 }
2130
2131 static int copy_metadata_imsm(struct supertype *st, int from, int to)
2132 {
2133         /* The second last sector of the device contains
2134          * the "struct imsm_super" metadata.
2135          * This contains mpb_size which is the size in bytes of the
2136          * extended metadata.  This is located immediately before
2137          * the imsm_super.
2138          * We want to read all that, plus the last sector which
2139          * may contain a migration record, and write it all
2140          * to the target.
2141          */
2142         void *buf;
2143         unsigned long long dsize, offset;
2144         int sectors;
2145         struct imsm_super *sb;
2146         struct intel_super *super = st->sb;
2147         unsigned int sector_size = super->sector_size;
2148         unsigned int written = 0;
2149
2150         if (posix_memalign(&buf, MAX_SECTOR_SIZE, MAX_SECTOR_SIZE) != 0)
2151                 return 1;
2152
2153         if (!get_dev_size(from, NULL, &dsize))
2154                 goto err;
2155
2156         if (lseek64(from, dsize-(2*sector_size), 0) < 0)
2157                 goto err;
2158         if ((unsigned int)read(from, buf, sector_size) != sector_size)
2159                 goto err;
2160         sb = buf;
2161         if (strncmp((char*)sb->sig, MPB_SIGNATURE, MPB_SIG_LEN) != 0)
2162                 goto err;
2163
2164         sectors = mpb_sectors(sb, sector_size) + 2;
2165         offset = dsize - sectors * sector_size;
2166         if (lseek64(from, offset, 0) < 0 ||
2167             lseek64(to, offset, 0) < 0)
2168                 goto err;
2169         while (written < sectors * sector_size) {
2170                 int n = sectors*sector_size - written;
2171                 if (n > 4096)
2172                         n = 4096;
2173                 if (read(from, buf, n) != n)
2174                         goto err;
2175                 if (write(to, buf, n) != n)
2176                         goto err;
2177                 written += n;
2178         }
2179         free(buf);
2180         return 0;
2181 err:
2182         free(buf);
2183         return 1;
2184 }
2185
2186 static void detail_super_imsm(struct supertype *st, char *homehost,
2187                               char *subarray)
2188 {
2189         struct mdinfo info;
2190         char nbuf[64];
2191         struct intel_super *super = st->sb;
2192         int temp_vol = super->current_vol;
2193
2194         if (subarray)
2195                 super->current_vol = strtoul(subarray, NULL, 10);
2196
2197         getinfo_super_imsm(st, &info, NULL);
2198         fname_from_uuid(st, &info, nbuf, ':');
2199         printf("\n              UUID : %s\n", nbuf + 5);
2200
2201         super->current_vol = temp_vol;
2202 }
2203
2204 static void brief_detail_super_imsm(struct supertype *st, char *subarray)
2205 {
2206         struct mdinfo info;
2207         char nbuf[64];
2208         struct intel_super *super = st->sb;
2209         int temp_vol = super->current_vol;
2210
2211         if (subarray)
2212                 super->current_vol = strtoul(subarray, NULL, 10);
2213
2214         getinfo_super_imsm(st, &info, NULL);
2215         fname_from_uuid(st, &info, nbuf, ':');
2216         printf(" UUID=%s", nbuf + 5);
2217
2218         super->current_vol = temp_vol;
2219 }
2220
2221 static int imsm_read_serial(int fd, char *devname, __u8 *serial,
2222                             size_t serial_buf_len);
2223 static void fd2devname(int fd, char *name);
2224
2225 static int ahci_enumerate_ports(const char *hba_path, int port_count, int host_base, int verbose)
2226 {
2227         /* dump an unsorted list of devices attached to AHCI Intel storage
2228          * controller, as well as non-connected ports
2229          */
2230         int hba_len = strlen(hba_path) + 1;
2231         struct dirent *ent;
2232         DIR *dir;
2233         char *path = NULL;
2234         int err = 0;
2235         unsigned long port_mask = (1 << port_count) - 1;
2236
2237         if (port_count > (int)sizeof(port_mask) * 8) {
2238                 if (verbose > 0)
2239                         pr_err("port_count %d out of range\n", port_count);
2240                 return 2;
2241         }
2242
2243         /* scroll through /sys/dev/block looking for devices attached to
2244          * this hba
2245          */
2246         dir = opendir("/sys/dev/block");
2247         if (!dir)
2248                 return 1;
2249
2250         for (ent = readdir(dir); ent; ent = readdir(dir)) {
2251                 int fd;
2252                 char model[64];
2253                 char vendor[64];
2254                 char buf[1024];
2255                 int major, minor;
2256                 char *device;
2257                 char *c;
2258                 int port;
2259                 int type;
2260
2261                 if (sscanf(ent->d_name, "%d:%d", &major, &minor) != 2)
2262                         continue;
2263                 path = devt_to_devpath(makedev(major, minor));
2264                 if (!path)
2265                         continue;
2266                 if (!path_attached_to_hba(path, hba_path)) {
2267                         free(path);
2268                         path = NULL;
2269                         continue;
2270                 }
2271
2272                 /* retrieve the scsi device type */
2273                 if (asprintf(&device, "/sys/dev/block/%d:%d/device/xxxxxxx", major, minor) < 0) {
2274                         if (verbose > 0)
2275                                 pr_err("failed to allocate 'device'\n");
2276                         err = 2;
2277                         break;
2278                 }
2279                 sprintf(device, "/sys/dev/block/%d:%d/device/type", major, minor);
2280                 if (load_sys(device, buf, sizeof(buf)) != 0) {
2281                         if (verbose > 0)
2282                                 pr_err("failed to read device type for %s\n",
2283                                         path);
2284                         err = 2;
2285                         free(device);
2286                         break;
2287                 }
2288                 type = strtoul(buf, NULL, 10);
2289
2290                 /* if it's not a disk print the vendor and model */
2291                 if (!(type == 0 || type == 7 || type == 14)) {
2292                         vendor[0] = '\0';
2293                         model[0] = '\0';
2294                         sprintf(device, "/sys/dev/block/%d:%d/device/vendor", major, minor);
2295                         if (load_sys(device, buf, sizeof(buf)) == 0) {
2296                                 strncpy(vendor, buf, sizeof(vendor));
2297                                 vendor[sizeof(vendor) - 1] = '\0';
2298                                 c = (char *) &vendor[sizeof(vendor) - 1];
2299                                 while (isspace(*c) || *c == '\0')
2300                                         *c-- = '\0';
2301
2302                         }
2303                         sprintf(device, "/sys/dev/block/%d:%d/device/model", major, minor);
2304                         if (load_sys(device, buf, sizeof(buf)) == 0) {
2305                                 strncpy(model, buf, sizeof(model));
2306                                 model[sizeof(model) - 1] = '\0';
2307                                 c = (char *) &model[sizeof(model) - 1];
2308                                 while (isspace(*c) || *c == '\0')
2309                                         *c-- = '\0';
2310                         }
2311
2312                         if (vendor[0] && model[0])
2313                                 sprintf(buf, "%.64s %.64s", vendor, model);
2314                         else
2315                                 switch (type) { /* numbers from hald/linux/device.c */
2316                                 case 1: sprintf(buf, "tape"); break;
2317                                 case 2: sprintf(buf, "printer"); break;
2318                                 case 3: sprintf(buf, "processor"); break;
2319                                 case 4:
2320                                 case 5: sprintf(buf, "cdrom"); break;
2321                                 case 6: sprintf(buf, "scanner"); break;
2322                                 case 8: sprintf(buf, "media_changer"); break;
2323                                 case 9: sprintf(buf, "comm"); break;
2324                                 case 12: sprintf(buf, "raid"); break;
2325                                 default: sprintf(buf, "unknown");
2326                                 }
2327                 } else
2328                         buf[0] = '\0';
2329                 free(device);
2330
2331                 /* chop device path to 'host%d' and calculate the port number */
2332                 c = strchr(&path[hba_len], '/');
2333                 if (!c) {
2334                         if (verbose > 0)
2335                                 pr_err("%s - invalid path name\n", path + hba_len);
2336                         err = 2;
2337                         break;
2338                 }
2339                 *c = '\0';
2340                 if ((sscanf(&path[hba_len], "ata%d", &port) == 1) ||
2341                    ((sscanf(&path[hba_len], "host%d", &port) == 1)))
2342                         port -= host_base;
2343                 else {
2344                         if (verbose > 0) {
2345                                 *c = '/'; /* repair the full string */
2346                                 pr_err("failed to determine port number for %s\n",
2347                                         path);
2348                         }
2349                         err = 2;
2350                         break;
2351                 }
2352
2353                 /* mark this port as used */
2354                 port_mask &= ~(1 << port);
2355
2356                 /* print out the device information */
2357                 if (buf[0]) {
2358                         printf("          Port%d : - non-disk device (%s) -\n", port, buf);
2359                         continue;
2360                 }
2361
2362                 fd = dev_open(ent->d_name, O_RDONLY);
2363                 if (fd < 0)
2364                         printf("          Port%d : - disk info unavailable -\n", port);
2365                 else {
2366                         fd2devname(fd, buf);
2367                         printf("          Port%d : %s", port, buf);
2368                         if (imsm_read_serial(fd, NULL, (__u8 *)buf,
2369                                              sizeof(buf)) == 0)
2370                                 printf(" (%s)\n", buf);
2371                         else
2372                                 printf(" ()\n");
2373                         close(fd);
2374                 }
2375                 free(path);
2376                 path = NULL;
2377         }
2378         if (path)
2379                 free(path);
2380         if (dir)
2381                 closedir(dir);
2382         if (err == 0) {
2383                 int i;
2384
2385                 for (i = 0; i < port_count; i++)
2386                         if (port_mask & (1 << i))
2387                                 printf("          Port%d : - no device attached -\n", i);
2388         }
2389
2390         return err;
2391 }
2392
2393 static int print_nvme_info(struct sys_dev *hba)
2394 {
2395         char buf[1024];
2396         struct dirent *ent;
2397         DIR *dir;
2398         char *rp;
2399         int fd;
2400
2401         dir = opendir("/sys/block/");
2402         if (!dir)
2403                 return 1;
2404
2405         for (ent = readdir(dir); ent; ent = readdir(dir)) {
2406                 if (strstr(ent->d_name, "nvme")) {
2407                         sprintf(buf, "/sys/block/%s", ent->d_name);
2408                         rp = realpath(buf, NULL);
2409                         if (!rp)
2410                                 continue;
2411                         if (path_attached_to_hba(rp, hba->path)) {
2412                                 fd = open_dev(ent->d_name);
2413                                 if (fd < 0) {
2414                                         free(rp);
2415                                         continue;
2416                                 }
2417
2418                                 fd2devname(fd, buf);
2419                                 if (hba->type == SYS_DEV_VMD)
2420                                         printf(" NVMe under VMD : %s", buf);
2421                                 else if (hba->type == SYS_DEV_NVME)
2422                                         printf("    NVMe Device : %s", buf);
2423                                 if (!imsm_read_serial(fd, NULL, (__u8 *)buf,
2424                                                       sizeof(buf)))
2425                                         printf(" (%s)\n", buf);
2426                                 else
2427                                         printf("()\n");
2428                                 close(fd);
2429                         }
2430                         free(rp);
2431                 }
2432         }
2433
2434         closedir(dir);
2435         return 0;
2436 }
2437
2438 static void print_found_intel_controllers(struct sys_dev *elem)
2439 {
2440         for (; elem; elem = elem->next) {
2441                 pr_err("found Intel(R) ");
2442                 if (elem->type == SYS_DEV_SATA)
2443                         fprintf(stderr, "SATA ");
2444                 else if (elem->type == SYS_DEV_SAS)
2445                         fprintf(stderr, "SAS ");
2446                 else if (elem->type == SYS_DEV_NVME)
2447                         fprintf(stderr, "NVMe ");
2448
2449                 if (elem->type == SYS_DEV_VMD)
2450                         fprintf(stderr, "VMD domain");
2451                 else
2452                         fprintf(stderr, "RAID controller");
2453
2454                 if (elem->pci_id)
2455                         fprintf(stderr, " at %s", elem->pci_id);
2456                 fprintf(stderr, ".\n");
2457         }
2458         fflush(stderr);
2459 }
2460
2461 static int ahci_get_port_count(const char *hba_path, int *port_count)
2462 {
2463         struct dirent *ent;
2464         DIR *dir;
2465         int host_base = -1;
2466
2467         *port_count = 0;
2468         if ((dir = opendir(hba_path)) == NULL)
2469                 return -1;
2470
2471         for (ent = readdir(dir); ent; ent = readdir(dir)) {
2472                 int host;
2473
2474                 if ((sscanf(ent->d_name, "ata%d", &host) != 1) &&
2475                    ((sscanf(ent->d_name, "host%d", &host) != 1)))
2476                         continue;
2477                 if (*port_count == 0)
2478                         host_base = host;
2479                 else if (host < host_base)
2480                         host_base = host;
2481
2482                 if (host + 1 > *port_count + host_base)
2483                         *port_count = host + 1 - host_base;
2484         }
2485         closedir(dir);
2486         return host_base;
2487 }
2488
2489 static void print_imsm_capability(const struct imsm_orom *orom)
2490 {
2491         printf("       Platform : Intel(R) ");
2492         if (orom->capabilities == 0 && orom->driver_features == 0)
2493                 printf("Matrix Storage Manager\n");
2494         else if (imsm_orom_is_enterprise(orom) && orom->major_ver >= 6)
2495                 printf("Virtual RAID on CPU\n");
2496         else
2497                 printf("Rapid Storage Technology%s\n",
2498                         imsm_orom_is_enterprise(orom) ? " enterprise" : "");
2499         if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2500                 printf("        Version : %d.%d.%d.%d\n", orom->major_ver,
2501                                 orom->minor_ver, orom->hotfix_ver, orom->build);
2502         printf("    RAID Levels :%s%s%s%s%s\n",
2503                imsm_orom_has_raid0(orom) ? " raid0" : "",
2504                imsm_orom_has_raid1(orom) ? " raid1" : "",
2505                imsm_orom_has_raid1e(orom) ? " raid1e" : "",
2506                imsm_orom_has_raid10(orom) ? " raid10" : "",
2507                imsm_orom_has_raid5(orom) ? " raid5" : "");
2508         printf("    Chunk Sizes :%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2509                imsm_orom_has_chunk(orom, 2) ? " 2k" : "",
2510                imsm_orom_has_chunk(orom, 4) ? " 4k" : "",
2511                imsm_orom_has_chunk(orom, 8) ? " 8k" : "",
2512                imsm_orom_has_chunk(orom, 16) ? " 16k" : "",
2513                imsm_orom_has_chunk(orom, 32) ? " 32k" : "",
2514                imsm_orom_has_chunk(orom, 64) ? " 64k" : "",
2515                imsm_orom_has_chunk(orom, 128) ? " 128k" : "",
2516                imsm_orom_has_chunk(orom, 256) ? " 256k" : "",
2517                imsm_orom_has_chunk(orom, 512) ? " 512k" : "",
2518                imsm_orom_has_chunk(orom, 1024*1) ? " 1M" : "",
2519                imsm_orom_has_chunk(orom, 1024*2) ? " 2M" : "",
2520                imsm_orom_has_chunk(orom, 1024*4) ? " 4M" : "",
2521                imsm_orom_has_chunk(orom, 1024*8) ? " 8M" : "",
2522                imsm_orom_has_chunk(orom, 1024*16) ? " 16M" : "",
2523                imsm_orom_has_chunk(orom, 1024*32) ? " 32M" : "",
2524                imsm_orom_has_chunk(orom, 1024*64) ? " 64M" : "");
2525         printf("    2TB volumes :%s supported\n",
2526                (orom->attr & IMSM_OROM_ATTR_2TB)?"":" not");
2527         printf("      2TB disks :%s supported\n",
2528                (orom->attr & IMSM_OROM_ATTR_2TB_DISK)?"":" not");
2529         printf("      Max Disks : %d\n", orom->tds);
2530         printf("    Max Volumes : %d per array, %d per %s\n",
2531                orom->vpa, orom->vphba,
2532                imsm_orom_is_nvme(orom) ? "platform" : "controller");
2533         return;
2534 }
2535
2536 static void print_imsm_capability_export(const struct imsm_orom *orom)
2537 {
2538         printf("MD_FIRMWARE_TYPE=imsm\n");
2539         if (orom->major_ver || orom->minor_ver || orom->hotfix_ver || orom->build)
2540                 printf("IMSM_VERSION=%d.%d.%d.%d\n", orom->major_ver, orom->minor_ver,
2541                                 orom->hotfix_ver, orom->build);
2542         printf("IMSM_SUPPORTED_RAID_LEVELS=%s%s%s%s%s\n",
2543                         imsm_orom_has_raid0(orom) ? "raid0 " : "",
2544                         imsm_orom_has_raid1(orom) ? "raid1 " : "",
2545                         imsm_orom_has_raid1e(orom) ? "raid1e " : "",
2546                         imsm_orom_has_raid5(orom) ? "raid10 " : "",
2547                         imsm_orom_has_raid10(orom) ? "raid5 " : "");
2548         printf("IMSM_SUPPORTED_CHUNK_SIZES=%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2549                         imsm_orom_has_chunk(orom, 2) ? "2k " : "",
2550                         imsm_orom_has_chunk(orom, 4) ? "4k " : "",
2551                         imsm_orom_has_chunk(orom, 8) ? "8k " : "",
2552                         imsm_orom_has_chunk(orom, 16) ? "16k " : "",
2553                         imsm_orom_has_chunk(orom, 32) ? "32k " : "",
2554                         imsm_orom_has_chunk(orom, 64) ? "64k " : "",
2555                         imsm_orom_has_chunk(orom, 128) ? "128k " : "",
2556                         imsm_orom_has_chunk(orom, 256) ? "256k " : "",
2557                         imsm_orom_has_chunk(orom, 512) ? "512k " : "",
2558                         imsm_orom_has_chunk(orom, 1024*1) ? "1M " : "",
2559                         imsm_orom_has_chunk(orom, 1024*2) ? "2M " : "",
2560                         imsm_orom_has_chunk(orom, 1024*4) ? "4M " : "",
2561                         imsm_orom_has_chunk(orom, 1024*8) ? "8M " : "",
2562                         imsm_orom_has_chunk(orom, 1024*16) ? "16M " : "",
2563                         imsm_orom_has_chunk(orom, 1024*32) ? "32M " : "",
2564                         imsm_orom_has_chunk(orom, 1024*64) ? "64M " : "");
2565         printf("IMSM_2TB_VOLUMES=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB) ? "yes" : "no");
2566         printf("IMSM_2TB_DISKS=%s\n",(orom->attr & IMSM_OROM_ATTR_2TB_DISK) ? "yes" : "no");
2567         printf("IMSM_MAX_DISKS=%d\n",orom->tds);
2568         printf("IMSM_MAX_VOLUMES_PER_ARRAY=%d\n",orom->vpa);
2569         printf("IMSM_MAX_VOLUMES_PER_CONTROLLER=%d\n",orom->vphba);
2570 }
2571
2572 static int detail_platform_imsm(int verbose, int enumerate_only, char *controller_path)
2573 {
2574         /* There are two components to imsm platform support, the ahci SATA
2575          * controller and the option-rom.  To find the SATA controller we
2576          * simply look in /sys/bus/pci/drivers/ahci to see if an ahci
2577          * controller with the Intel vendor id is present.  This approach
2578          * allows mdadm to leverage the kernel's ahci detection logic, with the
2579          * caveat that if ahci.ko is not loaded mdadm will not be able to
2580          * detect platform raid capabilities.  The option-rom resides in a
2581          * platform "Adapter ROM".  We scan for its signature to retrieve the
2582          * platform capabilities.  If raid support is disabled in the BIOS the
2583          * option-rom capability structure will not be available.
2584          */
2585         struct sys_dev *list, *hba;
2586         int host_base = 0;
2587         int port_count = 0;
2588         int result=1;
2589
2590         if (enumerate_only) {
2591                 if (check_env("IMSM_NO_PLATFORM"))
2592                         return 0;
2593                 list = find_intel_devices();
2594                 if (!list)
2595                         return 2;
2596                 for (hba = list; hba; hba = hba->next) {
2597                         if (find_imsm_capability(hba)) {
2598                                 result = 0;
2599                                 break;
2600                         }
2601                         else
2602                                 result = 2;
2603                 }
2604                 return result;
2605         }
2606
2607         list = find_intel_devices();
2608         if (!list) {
2609                 if (verbose > 0)
2610                         pr_err("no active Intel(R) RAID controller found.\n");
2611                 return 2;
2612         } else if (verbose > 0)
2613                 print_found_intel_controllers(list);
2614
2615         for (hba = list; hba; hba = hba->next) {
2616                 if (controller_path && (compare_paths(hba->path, controller_path) != 0))
2617                         continue;
2618                 if (!find_imsm_capability(hba)) {
2619                         char buf[PATH_MAX];
2620                         pr_err("imsm capabilities not found for controller: %s (type %s)\n",
2621                                   hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path,
2622                                   get_sys_dev_type(hba->type));
2623                         continue;
2624                 }
2625                 result = 0;
2626         }
2627
2628         if (controller_path && result == 1) {
2629                 pr_err("no active Intel(R) RAID controller found under %s\n",
2630                                 controller_path);
2631                 return result;
2632         }
2633
2634         const struct orom_entry *entry;
2635
2636         for (entry = orom_entries; entry; entry = entry->next) {
2637                 if (entry->type == SYS_DEV_VMD) {
2638                         print_imsm_capability(&entry->orom);
2639                         printf(" 3rd party NVMe :%s supported\n",
2640                             imsm_orom_has_tpv_support(&entry->orom)?"":" not");
2641                         for (hba = list; hba; hba = hba->next) {
2642                                 if (hba->type == SYS_DEV_VMD) {
2643                                         char buf[PATH_MAX];
2644                                         printf(" I/O Controller : %s (%s)\n",
2645                                                 vmd_domain_to_controller(hba, buf), get_sys_dev_type(hba->type));
2646                                         if (print_nvme_info(hba)) {
2647                                                 if (verbose > 0)
2648                                                         pr_err("failed to get devices attached to VMD domain.\n");
2649                                                 result |= 2;
2650                                         }
2651                                 }
2652                         }
2653                         printf("\n");
2654                         continue;
2655                 }
2656
2657                 print_imsm_capability(&entry->orom);
2658                 if (entry->type == SYS_DEV_NVME) {
2659                         for (hba = list; hba; hba = hba->next) {
2660                                 if (hba->type == SYS_DEV_NVME)
2661                                         print_nvme_info(hba);
2662                         }
2663                         printf("\n");
2664                         continue;
2665                 }
2666
2667                 struct devid_list *devid;
2668                 for (devid = entry->devid_list; devid; devid = devid->next) {
2669                         hba = device_by_id(devid->devid);
2670                         if (!hba)
2671                                 continue;
2672
2673                         printf(" I/O Controller : %s (%s)\n",
2674                                 hba->path, get_sys_dev_type(hba->type));
2675                         if (hba->type == SYS_DEV_SATA) {
2676                                 host_base = ahci_get_port_count(hba->path, &port_count);
2677                                 if (ahci_enumerate_ports(hba->path, port_count, host_base, verbose)) {
2678                                         if (verbose > 0)
2679                                                 pr_err("failed to enumerate ports on SATA controller at %s.\n", hba->pci_id);
2680                                         result |= 2;
2681                                 }
2682                         }
2683                 }
2684                 printf("\n");
2685         }
2686
2687         return result;
2688 }
2689
2690 static int export_detail_platform_imsm(int verbose, char *controller_path)
2691 {
2692         struct sys_dev *list, *hba;
2693         int result=1;
2694
2695         list = find_intel_devices();
2696         if (!list) {
2697                 if (verbose > 0)
2698                         pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_INTEL_DEVICES\n");
2699                 result = 2;
2700                 return result;
2701         }
2702
2703         for (hba = list; hba; hba = hba->next) {
2704                 if (controller_path && (compare_paths(hba->path,controller_path) != 0))
2705                         continue;
2706                 if (!find_imsm_capability(hba) && verbose > 0) {
2707                         char buf[PATH_MAX];
2708                         pr_err("IMSM_DETAIL_PLATFORM_ERROR=NO_IMSM_CAPABLE_DEVICE_UNDER_%s\n",
2709                         hba->type == SYS_DEV_VMD ? vmd_domain_to_controller(hba, buf) : hba->path);
2710                 }
2711                 else
2712                         result = 0;
2713         }
2714
2715         const struct orom_entry *entry;
2716
2717         for (entry = orom_entries; entry; entry = entry->next) {
2718                 if (entry->type == SYS_DEV_VMD) {
2719                         for (hba = list; hba; hba = hba->next)
2720                                 print_imsm_capability_export(&entry->orom);
2721                         continue;
2722                 }
2723                 print_imsm_capability_export(&entry->orom);
2724         }
2725
2726         return result;
2727 }
2728
2729 static int match_home_imsm(struct supertype *st, char *homehost)
2730 {
2731         /* the imsm metadata format does not specify any host
2732          * identification information.  We return -1 since we can never
2733          * confirm nor deny whether a given array is "meant" for this
2734          * host.  We rely on compare_super and the 'family_num' fields to
2735          * exclude member disks that do not belong, and we rely on
2736          * mdadm.conf to specify the arrays that should be assembled.
2737          * Auto-assembly may still pick up "foreign" arrays.
2738          */
2739
2740         return -1;
2741 }
2742
2743 static void uuid_from_super_imsm(struct supertype *st, int uuid[4])
2744 {
2745         /* The uuid returned here is used for:
2746          *  uuid to put into bitmap file (Create, Grow)
2747          *  uuid for backup header when saving critical section (Grow)
2748          *  comparing uuids when re-adding a device into an array
2749          *    In these cases the uuid required is that of the data-array,
2750          *    not the device-set.
2751          *  uuid to recognise same set when adding a missing device back
2752          *    to an array.   This is a uuid for the device-set.
2753          *
2754          * For each of these we can make do with a truncated
2755          * or hashed uuid rather than the original, as long as
2756          * everyone agrees.
2757          * In each case the uuid required is that of the data-array,
2758          * not the device-set.
2759          */
2760         /* imsm does not track uuid's so we synthesis one using sha1 on
2761          * - The signature (Which is constant for all imsm array, but no matter)
2762          * - the orig_family_num of the container
2763          * - the index number of the volume
2764          * - the 'serial' number of the volume.
2765          * Hopefully these are all constant.
2766          */
2767         struct intel_super *super = st->sb;
2768
2769         char buf[20];
2770         struct sha1_ctx ctx;
2771         struct imsm_dev *dev = NULL;
2772         __u32 family_num;
2773
2774         /* some mdadm versions failed to set ->orig_family_num, in which
2775          * case fall back to ->family_num.  orig_family_num will be
2776          * fixed up with the first metadata update.
2777          */
2778         family_num = super->anchor->orig_family_num;
2779         if (family_num == 0)
2780                 family_num = super->anchor->family_num;
2781         sha1_init_ctx(&ctx);
2782         sha1_process_bytes(super->anchor->sig, MPB_SIG_LEN, &ctx);
2783         sha1_process_bytes(&family_num, sizeof(__u32), &ctx);
2784         if (super->current_vol >= 0)
2785                 dev = get_imsm_dev(super, super->current_vol);
2786         if (dev) {
2787                 __u32 vol = super->current_vol;
2788                 sha1_process_bytes(&vol, sizeof(vol), &ctx);
2789                 sha1_process_bytes(dev->volume, MAX_RAID_SERIAL_LEN, &ctx);
2790         }
2791         sha1_finish_ctx(&ctx, buf);
2792         memcpy(uuid, buf, 4*4);
2793 }
2794
2795 #if 0
2796 static void
2797 get_imsm_numerical_version(struct imsm_super *mpb, int *m, int *p)
2798 {
2799         __u8 *v = get_imsm_version(mpb);
2800         __u8 *end = mpb->sig + MAX_SIGNATURE_LENGTH;
2801         char major[] = { 0, 0, 0 };
2802         char minor[] = { 0 ,0, 0 };
2803         char patch[] = { 0, 0, 0 };
2804         char *ver_parse[] = { major, minor, patch };
2805         int i, j;
2806
2807         i = j = 0;
2808         while (*v != '\0' && v < end) {
2809                 if (*v != '.' && j < 2)
2810                         ver_parse[i][j++] = *v;
2811                 else {
2812                         i++;
2813                         j = 0;
2814                 }
2815                 v++;
2816         }
2817
2818         *m = strtol(minor, NULL, 0);
2819         *p = strtol(patch, NULL, 0);
2820 }
2821 #endif
2822
2823 static __u32 migr_strip_blocks_resync(struct imsm_dev *dev)
2824 {
2825         /* migr_strip_size when repairing or initializing parity */
2826         struct imsm_map *map = get_imsm_map(dev, MAP_0);
2827         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2828
2829         switch (get_imsm_raid_level(map)) {
2830         case 5:
2831         case 10:
2832                 return chunk;
2833         default:
2834                 return 128*1024 >> 9;
2835         }
2836 }
2837
2838 static __u32 migr_strip_blocks_rebuild(struct imsm_dev *dev)
2839 {
2840         /* migr_strip_size when rebuilding a degraded disk, no idea why
2841          * this is different than migr_strip_size_resync(), but it's good
2842          * to be compatible
2843          */
2844         struct imsm_map *map = get_imsm_map(dev, MAP_1);
2845         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2846
2847         switch (get_imsm_raid_level(map)) {
2848         case 1:
2849         case 10:
2850                 if (map->num_members % map->num_domains == 0)
2851                         return 128*1024 >> 9;
2852                 else
2853                         return chunk;
2854         case 5:
2855                 return max((__u32) 64*1024 >> 9, chunk);
2856         default:
2857                 return 128*1024 >> 9;
2858         }
2859 }
2860
2861 static __u32 num_stripes_per_unit_resync(struct imsm_dev *dev)
2862 {
2863         struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2864         struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2865         __u32 lo_chunk = __le32_to_cpu(lo->blocks_per_strip);
2866         __u32 hi_chunk = __le32_to_cpu(hi->blocks_per_strip);
2867
2868         return max((__u32) 1, hi_chunk / lo_chunk);
2869 }
2870
2871 static __u32 num_stripes_per_unit_rebuild(struct imsm_dev *dev)
2872 {
2873         struct imsm_map *lo = get_imsm_map(dev, MAP_0);
2874         int level = get_imsm_raid_level(lo);
2875
2876         if (level == 1 || level == 10) {
2877                 struct imsm_map *hi = get_imsm_map(dev, MAP_1);
2878
2879                 return hi->num_domains;
2880         } else
2881                 return num_stripes_per_unit_resync(dev);
2882 }
2883
2884 static __u8 imsm_num_data_members(struct imsm_map *map)
2885 {
2886         /* named 'imsm_' because raid0, raid1 and raid10
2887          * counter-intuitively have the same number of data disks
2888          */
2889         switch (get_imsm_raid_level(map)) {
2890         case 0:
2891                 return map->num_members;
2892                 break;
2893         case 1:
2894         case 10:
2895                 return map->num_members/2;
2896         case 5:
2897                 return map->num_members - 1;
2898         default:
2899                 dprintf("unsupported raid level\n");
2900                 return 0;
2901         }
2902 }
2903
2904 static unsigned long long calc_component_size(struct imsm_map *map,
2905                                               struct imsm_dev *dev)
2906 {
2907         unsigned long long component_size;
2908         unsigned long long dev_size = imsm_dev_size(dev);
2909         long long calc_dev_size = 0;
2910         unsigned int member_disks = imsm_num_data_members(map);
2911
2912         if (member_disks == 0)
2913                 return 0;
2914
2915         component_size = per_dev_array_size(map);
2916         calc_dev_size = component_size * member_disks;
2917
2918         /* Component size is rounded to 1MB so difference between size from
2919          * metadata and size calculated from num_data_stripes equals up to
2920          * 2048 blocks per each device. If the difference is higher it means
2921          * that array size was expanded and num_data_stripes was not updated.
2922          */
2923         if (llabs(calc_dev_size - (long long)dev_size) >
2924             (1 << SECT_PER_MB_SHIFT) * member_disks) {
2925                 component_size = dev_size / member_disks;
2926                 dprintf("Invalid num_data_stripes in metadata; expected=%llu, found=%llu\n",
2927                         component_size / map->blocks_per_strip,
2928                         num_data_stripes(map));
2929         }
2930
2931         return component_size;
2932 }
2933
2934 static __u32 parity_segment_depth(struct imsm_dev *dev)
2935 {
2936         struct imsm_map *map = get_imsm_map(dev, MAP_0);
2937         __u32 chunk =  __le32_to_cpu(map->blocks_per_strip);
2938
2939         switch(get_imsm_raid_level(map)) {
2940         case 1:
2941         case 10:
2942                 return chunk * map->num_domains;
2943         case 5:
2944                 return chunk * map->num_members;
2945         default:
2946                 return chunk;
2947         }
2948 }
2949
2950 static __u32 map_migr_block(struct imsm_dev *dev, __u32 block)
2951 {
2952         struct imsm_map *map = get_imsm_map(dev, MAP_1);
2953         __u32 chunk = __le32_to_cpu(map->blocks_per_strip);
2954         __u32 strip = block / chunk;
2955
2956         switch (get_imsm_raid_level(map)) {
2957         case 1:
2958         case 10: {
2959                 __u32 vol_strip = (strip * map->num_domains) + 1;
2960                 __u32 vol_stripe = vol_strip / map->num_members;
2961
2962                 return vol_stripe * chunk + block % chunk;
2963         } case 5: {
2964                 __u32 stripe = strip / (map->num_members - 1);
2965
2966                 return stripe * chunk + block % chunk;
2967         }
2968         default:
2969                 return 0;
2970         }
2971 }
2972
2973 static __u64 blocks_per_migr_unit(struct intel_super *super,
2974                                   struct imsm_dev *dev)
2975 {
2976         /* calculate the conversion factor between per member 'blocks'
2977          * (md/{resync,rebuild}_start) and imsm migration units, return
2978          * 0 for the 'not migrating' and 'unsupported migration' cases
2979          */
2980         if (!dev->vol.migr_state)
2981                 return 0;
2982
2983         switch (migr_type(dev)) {
2984         case MIGR_GEN_MIGR: {
2985                 struct migr_record *migr_rec = super->migr_rec;
2986                 return __le32_to_cpu(migr_rec->blocks_per_unit);
2987         }
2988         case MIGR_VERIFY:
2989         case MIGR_REPAIR:
2990         case MIGR_INIT: {
2991                 struct imsm_map *map = get_imsm_map(dev, MAP_0);
2992                 __u32 stripes_per_unit;
2993                 __u32 blocks_per_unit;
2994                 __u32 parity_depth;
2995                 __u32 migr_chunk;
2996                 __u32 block_map;
2997                 __u32 block_rel;
2998                 __u32 segment;
2999                 __u32 stripe;
3000                 __u8  disks;
3001
3002                 /* yes, this is really the translation of migr_units to
3003                  * per-member blocks in the 'resync' case
3004                  */
3005                 stripes_per_unit = num_stripes_per_unit_resync(dev);
3006                 migr_chunk = migr_strip_blocks_resync(dev);
3007                 disks = imsm_num_data_members(map);
3008                 blocks_per_unit = stripes_per_unit * migr_chunk * disks;
3009                 stripe = __le16_to_cpu(map->blocks_per_strip) * disks;
3010                 segment = blocks_per_unit / stripe;
3011                 block_rel = blocks_per_unit - segment * stripe;
3012                 parity_depth = parity_segment_depth(dev);
3013                 block_map = map_migr_block(dev, block_rel);
3014                 return block_map + parity_depth * segment;
3015         }
3016         case MIGR_REBUILD: {
3017                 __u32 stripes_per_unit;
3018                 __u32 migr_chunk;
3019
3020                 stripes_per_unit = num_stripes_per_unit_rebuild(dev);
3021                 migr_chunk = migr_strip_blocks_rebuild(dev);
3022                 return migr_chunk * stripes_per_unit;
3023         }
3024         case MIGR_STATE_CHANGE:
3025         default:
3026                 return 0;
3027         }
3028 }
3029
3030 static int imsm_level_to_layout(int level)
3031 {
3032         switch (level) {
3033         case 0:
3034         case 1:
3035                 return 0;
3036         case 5:
3037         case 6:
3038                 return ALGORITHM_LEFT_ASYMMETRIC;
3039         case 10:
3040                 return 0x102;
3041         }
3042         return UnSet;
3043 }
3044
3045 /*******************************************************************************
3046  * Function:    read_imsm_migr_rec
3047  * Description: Function reads imsm migration record from last sector of disk
3048  * Parameters:
3049  *      fd      : disk descriptor
3050  *      super   : metadata info
3051  * Returns:
3052  *       0 : success,
3053  *      -1 : fail
3054  ******************************************************************************/
3055 static int read_imsm_migr_rec(int fd, struct intel_super *super)
3056 {
3057         int ret_val = -1;
3058         unsigned int sector_size = super->sector_size;
3059         unsigned long long dsize;
3060
3061         get_dev_size(fd, NULL, &dsize);
3062         if (lseek64(fd, dsize - (sector_size*MIGR_REC_SECTOR_POSITION),
3063                    SEEK_SET) < 0) {
3064                 pr_err("Cannot seek to anchor block: %s\n",
3065                        strerror(errno));
3066                 goto out;
3067         }
3068         if ((unsigned int)read(fd, super->migr_rec_buf,
3069             MIGR_REC_BUF_SECTORS*sector_size) !=
3070             MIGR_REC_BUF_SECTORS*sector_size) {
3071                 pr_err("Cannot read migr record block: %s\n",
3072                        strerror(errno));
3073                 goto out;
3074         }
3075         ret_val = 0;
3076         if (sector_size == 4096)
3077                 convert_from_4k_imsm_migr_rec(super);
3078
3079 out:
3080         return ret_val;
3081 }
3082
3083 static struct imsm_dev *imsm_get_device_during_migration(
3084         struct intel_super *super)
3085 {
3086
3087         struct intel_dev *dv;
3088
3089         for (dv = super->devlist; dv; dv = dv->next) {
3090                 if (is_gen_migration(dv->dev))
3091                         return dv->dev;
3092         }
3093         return NULL;
3094 }
3095
3096 /*******************************************************************************
3097  * Function:    load_imsm_migr_rec
3098  * Description: Function reads imsm migration record (it is stored at the last
3099  *              sector of disk)
3100  * Parameters:
3101  *      super   : imsm internal array info
3102  *      info    : general array info
3103  * Returns:
3104  *       0 : success
3105  *      -1 : fail
3106  *      -2 : no migration in progress
3107  ******************************************************************************/
3108 static int load_imsm_migr_rec(struct intel_super *super, struct mdinfo *info)
3109 {
3110         struct mdinfo *sd;
3111         struct dl *dl;
3112         char nm[30];
3113         int retval = -1;
3114         int fd = -1;
3115         struct imsm_dev *dev;
3116         struct imsm_map *map;
3117         int slot = -1;
3118
3119         /* find map under migration */
3120         dev = imsm_get_device_during_migration(super);
3121         /* nothing to load,no migration in progress?
3122         */
3123         if (dev == NULL)
3124                 return -2;
3125
3126         if (info) {
3127                 for (sd = info->devs ; sd ; sd = sd->next) {
3128                         /* read only from one of the first two slots */
3129                         if ((sd->disk.raid_disk < 0) ||
3130                             (sd->disk.raid_disk > 1))
3131                                 continue;
3132
3133                         sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
3134                         fd = dev_open(nm, O_RDONLY);
3135                         if (fd >= 0)
3136                                 break;
3137                 }
3138         }
3139         if (fd < 0) {
3140                 map = get_imsm_map(dev, MAP_0);
3141                 for (dl = super->disks; dl; dl = dl->next) {
3142                         /* skip spare and failed disks
3143                         */
3144                         if (dl->index < 0)
3145                                 continue;
3146                         /* read only from one of the first two slots */
3147                         if (map)
3148                                 slot = get_imsm_disk_slot(map, dl->index);
3149                         if (map == NULL || slot > 1 || slot < 0)
3150                                 continue;
3151                         sprintf(nm, "%d:%d", dl->major, dl->minor);
3152                         fd = dev_open(nm, O_RDONLY);
3153                         if (fd >= 0)
3154                                 break;
3155                 }
3156         }
3157         if (fd < 0)
3158                 goto out;
3159         retval = read_imsm_migr_rec(fd, super);
3160
3161 out:
3162         if (fd >= 0)
3163                 close(fd);
3164         return retval;
3165 }
3166
3167 /*******************************************************************************
3168  * function: imsm_create_metadata_checkpoint_update
3169  * Description: It creates update for checkpoint change.
3170  * Parameters:
3171  *      super   : imsm internal array info
3172  *      u       : pointer to prepared update
3173  * Returns:
3174  *      Uptate length.
3175  *      If length is equal to 0, input pointer u contains no update
3176  ******************************************************************************/
3177 static int imsm_create_metadata_checkpoint_update(
3178         struct intel_super *super,
3179         struct imsm_update_general_migration_checkpoint **u)
3180 {
3181
3182         int update_memory_size = 0;
3183
3184         dprintf("(enter)\n");
3185
3186         if (u == NULL)
3187                 return 0;
3188         *u = NULL;
3189
3190         /* size of all update data without anchor */
3191         update_memory_size =
3192                 sizeof(struct imsm_update_general_migration_checkpoint);
3193
3194         *u = xcalloc(1, update_memory_size);
3195         if (*u == NULL) {
3196                 dprintf("error: cannot get memory\n");
3197                 return 0;
3198         }
3199         (*u)->type = update_general_migration_checkpoint;
3200         (*u)->curr_migr_unit = current_migr_unit(super->migr_rec);
3201         dprintf("prepared for %u\n", (*u)->curr_migr_unit);
3202
3203         return update_memory_size;
3204 }
3205
3206 static void imsm_update_metadata_locally(struct supertype *st,
3207                                          void *buf, int len);
3208
3209 /*******************************************************************************
3210  * Function:    write_imsm_migr_rec
3211  * Description: Function writes imsm migration record
3212  *              (at the last sector of disk)
3213  * Parameters:
3214  *      super   : imsm internal array info
3215  * Returns:
3216  *       0 : success
3217  *      -1 : if fail
3218  ******************************************************************************/
3219 static int write_imsm_migr_rec(struct supertype *st)
3220 {
3221         struct intel_super *super = st->sb;
3222         unsigned int sector_size = super->sector_size;
3223         unsigned long long dsize;
3224         char nm[30];
3225         int fd = -1;
3226         int retval = -1;
3227         struct dl *sd;
3228         int len;
3229         struct imsm_update_general_migration_checkpoint *u;
3230         struct imsm_dev *dev;
3231         struct imsm_map *map;
3232
3233         /* find map under migration */
3234         dev = imsm_get_device_during_migration(super);
3235         /* if no migration, write buffer anyway to clear migr_record
3236          * on disk based on first available device
3237         */
3238         if (dev == NULL)
3239                 dev = get_imsm_dev(super, super->current_vol < 0 ? 0 :
3240                                           super->current_vol);
3241
3242         map = get_imsm_map(dev, MAP_0);
3243
3244         if (sector_size == 4096)
3245                 convert_to_4k_imsm_migr_rec(super);
3246         for (sd = super->disks ; sd ; sd = sd->next) {
3247                 int slot = -1;
3248
3249                 /* skip failed and spare devices */
3250                 if (sd->index < 0)
3251                         continue;
3252                 /* write to 2 first slots only */
3253                 if (map)
3254                         slot = get_imsm_disk_slot(map, sd->index);
3255                 if (map == NULL || slot > 1 || slot < 0)
3256                         continue;
3257
3258                 sprintf(nm, "%d:%d", sd->major, sd->minor);
3259                 fd = dev_open(nm, O_RDWR);
3260                 if (fd < 0)
3261                         continue;
3262                 get_dev_size(fd, NULL, &dsize);
3263                 if (lseek64(fd, dsize - (MIGR_REC_SECTOR_POSITION*sector_size),
3264                     SEEK_SET) < 0) {
3265                         pr_err("Cannot seek to anchor block: %s\n",
3266                                strerror(errno));
3267                         goto out;
3268                 }
3269                 if ((unsigned int)write(fd, super->migr_rec_buf,
3270                     MIGR_REC_BUF_SECTORS*sector_size) !=
3271                     MIGR_REC_BUF_SECTORS*sector_size) {
3272                         pr_err("Cannot write migr record block: %s\n",
3273                                strerror(errno));
3274                         goto out;
3275                 }
3276                 close(fd);
3277                 fd = -1;
3278         }
3279         if (sector_size == 4096)
3280                 convert_from_4k_imsm_migr_rec(super);
3281         /* update checkpoint information in metadata */
3282         len = imsm_create_metadata_checkpoint_update(super, &u);
3283         if (len <= 0) {
3284                 dprintf("imsm: Cannot prepare update\n");
3285                 goto out;
3286         }
3287         /* update metadata locally */
3288         imsm_update_metadata_locally(st, u, len);
3289         /* and possibly remotely */
3290         if (st->update_tail) {
3291                 append_metadata_update(st, u, len);
3292                 /* during reshape we do all work inside metadata handler
3293                  * manage_reshape(), so metadata update has to be triggered
3294                  * insida it
3295                  */
3296                 flush_metadata_updates(st);
3297                 st->update_tail = &st->updates;
3298         } else
3299                 free(u);
3300
3301         retval = 0;
3302  out:
3303         if (fd >= 0)
3304                 close(fd);
3305         return retval;
3306 }
3307
3308 /* spare/missing disks activations are not allowe when
3309  * array/container performs reshape operation, because
3310  * all arrays in container works on the same disks set
3311  */
3312 int imsm_reshape_blocks_arrays_changes(struct intel_super *super)
3313 {
3314         int rv = 0;
3315         struct intel_dev *i_dev;
3316         struct imsm_dev *dev;
3317
3318         /* check whole container
3319          */
3320         for (i_dev = super->devlist; i_dev; i_dev = i_dev->next) {
3321                 dev = i_dev->dev;
3322                 if (is_gen_migration(dev)) {
3323                         /* No repair during any migration in container
3324                          */
3325                         rv = 1;
3326                         break;
3327                 }
3328         }
3329         return rv;
3330 }
3331 static unsigned long long imsm_component_size_alignment_check(int level,
3332                                               int chunk_size,
3333                                               unsigned int sector_size,
3334                                               unsigned long long component_size)
3335 {
3336         unsigned int component_size_alignment;
3337
3338         /* check component size alignment
3339         */
3340         component_size_alignment = component_size % (chunk_size/sector_size);
3341
3342         dprintf("(Level: %i, chunk_size = %i, component_size = %llu), component_size_alignment = %u\n",
3343                 level, chunk_size, component_size,
3344                 component_size_alignment);
3345
3346         if (component_size_alignment && (level != 1) && (level != UnSet)) {
3347                 dprintf("imsm: reported component size aligned from %llu ",
3348                         component_size);
3349                 component_size -= component_size_alignment;
3350                 dprintf_cont("to %llu (%i).\n",
3351                         component_size, component_size_alignment);
3352         }
3353
3354         return component_size;
3355 }
3356
3357 static unsigned long long get_ppl_sector(struct intel_super *super, int dev_idx)
3358 {
3359         struct imsm_dev *dev = get_imsm_dev(super, dev_idx);
3360         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3361
3362         return pba_of_lba0(map) +
3363                (num_data_stripes(map) * map->blocks_per_strip);
3364 }
3365
3366 static void getinfo_super_imsm_volume(struct supertype *st, struct mdinfo *info, char *dmap)
3367 {
3368         struct intel_super *super = st->sb;
3369         struct migr_record *migr_rec = super->migr_rec;
3370         struct imsm_dev *dev = get_imsm_dev(super, super->current_vol);
3371         struct imsm_map *map = get_imsm_map(dev, MAP_0);
3372         struct imsm_map *prev_map = get_imsm_map(dev, MAP_1);
3373         struct imsm_map *map_to_analyse = map;
3374         struct dl *dl;
3375         int map_disks = info->array.raid_disks;
3376
3377         memset(info, 0, sizeof(*info));
3378         if (prev_map)
3379                 map_to_analyse = prev_map;
3380
3381         dl = super->current_disk;
3382
3383         info->container_member    = super->current_vol;
3384         info->array.raid_disks    = map->num_members;
3385         info->array.level         = get_imsm_raid_level(map_to_analyse);
3386         info->array.layout        = imsm_level_to_layout(info->array.level);
3387         info->array.md_minor      = -1;
3388         info->array.ctime         = 0;
3389         info->array.utime         = 0;
3390         info->array.chunk_size    =
3391                 __le16_to_cpu(map_to_analyse->blocks_per_strip) << 9;
3392         info->array.state         = !(dev->vol.dirty & RAIDVOL_DIRTY);
3393         info->custom_array_size   = imsm_dev_size(dev);
3394         info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
3395
3396         if (is_gen_migration(dev)) {
3397                 info->reshape_active = 1;
3398                 info->new_level = get_imsm_raid_level(map);
3399                 info->new_layout = imsm_level_to_layout(info->new_level);
3400                 info->new_chunk = __le16_to_cpu(map->blocks_per_strip) << 9;
3401                 info->delta_disks = map->num_members - prev_map->num_members;
3402                 if (info->delta_disks) {
3403                         /* this needs to be applied to every array
3404                          * in the container.
3405                          */
3406                         info->reshape_active = CONTAINER_RESHAPE;
3407                 }
3408                 /* We shape information that we give to md might have to be
3409                  * modify to cope with md's requirement for reshaping arrays.
3410                  * For example, when reshaping a RAID0, md requires it to be
3411                  * presented as a degraded RAID4.
3412                  * Also if a RAID0 is migrating to a RAID5 we need to specify
3413                  * the array as already being RAID5, but the 'before' layout
3414                  * is a RAID4-like layout.
3415                  */
3416                 switch (info->array.level) {
3417                 case 0:
3418                         switch(info->new_level) {
3419                         case 0:
3420                                 /* conversion is happening as RAID4 */
3421                                 info->array.level = 4;
3422                                 info->array.raid_disks += 1;
3423                                 break;
3424                         case 5:
3425                                 /* conversion is happening as RAID5 */
3426                                 info->array.level = 5;
3427                                 info->array.layout = ALGORITHM_PARITY_N;
3428                                 info->delta_disks -= 1;
3429                                 break;
3430                         default:
3431                                 /* FIXME error message */
3432                                 info->array.level = UnSet;
3433                                 break;
3434                         }
3435                         break;
3436                 }
3437         } else {
3438                 info->new_level = UnSet;
3439                 info->new_layout = UnSet;
3440                 info->new_chunk = info->array.chunk_size;
3441                 info->delta_disks = 0;
3442         }
3443
3444         if (dl) {
3445                 info->disk.major = dl->major;
3446                 info->disk.minor = dl->minor;
3447                 info->disk.number = dl->index;
3448                 info->disk.raid_disk = get_imsm_disk_slot(map_to_analyse,
3449                                                           dl->index);
3450         }
3451
3452         info->data_offset         = pba_of_lba0(map_to_analyse);
3453         info->component_size = calc_component_size(map, dev);
3454         info->component_size = imsm_component_size_alignment_check(
3455                                                         info->array.level,
3456                                                         info->array.chunk_size,
3457                                                         super->sector_size,
3458                                                         info->component_size);
3459         info->bb.supported = 1;
3460
3461         memset(info->uuid, 0, sizeof(info->uuid));
3462         info->recovery_start = MaxSector;
3463
3464         if (info->array.level == 5 &&
3465             (dev->rwh_policy == RWH_DISTRIBUTED ||
3466              dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)) {
3467                 info->consistency_policy = CONSISTENCY_POLICY_PPL;
3468                 info->ppl_sector = get_ppl_sector(super, super->current_vol);
3469                 if (dev->rwh_policy == RWH_MULTIPLE_DISTRIBUTED)
3470                         info->ppl_size = MULTIPLE_PPL_AREA_SIZE_IMSM >> 9;
3471                 else
3472                         info->ppl_size = (PPL_HEADER_SIZE + PPL_ENTRY_SPACE)
3473                                           >> 9;
3474         } else if (info->array.level <= 0) {
3475                 info->consistency_policy = CONSISTENCY_POLICY_NONE;
3476         } else {
3477                 info->consistency_policy = CONSISTENCY_POLICY_RESYNC;
3478         }
3479
3480         info->reshape_progress = 0;
3481         info->resync_start = MaxSector;
3482         if ((map_to_analyse->map_state == IMSM_T_STATE_UNINITIALIZED ||
3483             !(info->array.state & 1)) &&
3484             imsm_reshape_blocks_arrays_changes(super) == 0) {
3485                 info->resync_start = 0;
3486         }
3487         if (dev->vol.migr_state) {
3488                 switch (migr_type(dev)) {
3489                 case MIGR_REPAIR:
3490                 case MIGR_INIT: {
3491                         __u64 blocks_per_unit = blocks_per_migr_unit(super,
3492                                                                      dev);
3493                         __u64 units = __le32_to_cpu(dev->vol.curr_migr_unit);
3494
3495                         info->resync_start = blocks_per_unit * units;
3496                         break;
3497                 }
3498                 case MIGR_GEN_MIGR: {
3499                         __u64 blocks_per_unit = blocks_per_migr_unit(super,
3500                                                                      dev);
3501                         __u64 units = current_migr_unit(migr_rec);
3502                         unsigned long long array_blocks;
3503                         int used_disks;
3504
3505                         if (__le32_to_cpu(migr_rec->ascending_migr) &&
3506                             (units <
3507                                 (get_num_migr_units(migr_rec)-1)) &&
3508                             (super->migr_rec->rec_status ==
3509                                         __cpu_to_le32(UNIT_SRC_IN_CP_AREA)))
3510                                 units++;
3511
3512                         info->reshape_progress = blocks_per_unit * units;
3513
3514                         dprintf("IMSM: General Migration checkpoint : %llu (%llu) -> read reshape progress : %llu\n",
3515                                 (unsigned long long)units,
3516                                 (unsigned long long)blocks_per_unit,
3517                                 info->reshape_progress);
3518
3519                         used_disks = imsm_num_data_members(prev_map);
3520                         if (used_disks > 0) {
3521                                 array_blocks = per_dev_array_size(map) *
3522                                         used_disks;
3523                                 info->custom_array_size =
3524                                         round_size_to_mb(array_blocks,
3525                                                          used_disks);
3526
3527                         }
3528                 }
3529                 case MIGR_VERIFY:
3530                         /* we could emulate the checkpointing of
3531                          * 'sync_action=check' migrations, but for now
3532                          * we just immediately complete them
3533                          */
3534                 case MIGR_REBUILD:
3535                         /* this is handled by container_content_imsm() */
3536                 case MIGR_STATE_CHANGE:
3537                         /* FIXME handle other migrations */
3538                 default:
3539                         /* we are not dirty, so... */
3540                         info->resync_start = MaxSector;
3541                 }
3542         }
3543
3544         strncpy(info->name, (char *) dev->volume, MAX_RAID_SERIAL_LEN);
3545         info->name[MAX_RAID_SERIAL_LEN] = 0;
3546
3547         info->array.major_version = -1;
3548         info->array.minor_version = -2;
3549         sprintf(info->text_version, "/%s/%d", st->container_devnm, info->container_member);
3550         info->safe_mode_delay = 4000;  /* 4 secs like the Matrix driver */
3551         uuid_from_super_imsm(st, info->uuid);
3552
3553         if (dmap) {
3554                 int i, j;
3555                 for (i=0; i<map_disks; i++) {
3556                         dmap[i] = 0;
3557                         if (i < info->array.raid_disks) {
3558                                 struct imsm_disk *dsk;
3559                                 j = get_imsm_disk_idx(dev, i, MAP_X);
3560                                 dsk = get_imsm_disk(super, j);
3561                                 if (dsk && (dsk->status & CONFIGURED_DISK))
3562                                         dmap[i] = 1;
3563                         }
3564                 }
3565         }
3566 }
3567
3568 static __u8 imsm_check_degraded(struct intel_super *super, struct imsm_dev *dev,
3569                                 int failed, int look_in_map);
3570
3571 static int imsm_count_failed(struct intel_super *super, struct imsm_dev *dev,
3572                              int look_in_map);
3573
3574 static void manage_second_map(struct intel_super *super, struct imsm_dev *dev)
3575 {
3576         if (is_gen_migration(dev)) {
3577                 int failed;
3578                 __u8 map_state;
3579                 struct imsm_map *map2 = get_imsm_map(dev, MAP_1);
3580
3581                 failed = imsm_count_failed(super, dev, MAP_1);
3582                 map_state = imsm_check_degraded(super, dev, failed, MAP_1);
3583                 if (map2->map_state != map_state) {
3584                         map2->map_state = map_state;
3585                         super->updates_pending++;
3586                 }
3587         }
3588 }
3589
3590 static struct imsm_disk *get_imsm_missing(struct intel_super *super, __u8 index)
3591 {
3592         struct dl *d;
3593
3594         for (d = super->missing; d; d = d->next)
3595                 if (d->index == index)
3596                         return &d->disk;
3597         return NULL;
3598 }
3599
3600 static void getinfo_super_imsm(struct supertype *st, struct mdinfo *info, char *map)
3601 {
3602         struct intel_super *super = st->sb;
3603         struct imsm_disk *disk;
3604         int map_disks = info->array.raid_disks;
3605         int max_enough = -1;
3606         int i;
3607         struct imsm_super *mpb;
3608
3609         if (super->current_vol >= 0) {
3610                 getinfo_super_imsm_volume(st, info, map);
3611                 return;
3612         }
3613         memset(info, 0, sizeof(*info));
3614
3615         /* Set raid_disks to zero so that Assemble will always pull in valid
3616          * spares
3617          */
3618         info->array.raid_disks    = 0;
3619         info->array.level         = LEVEL_CONTAINER;
3620         info->array.layout        = 0;
3621         info->array.md_minor      = -1;
3622         info->array.ctime         = 0; /* N/A for imsm */
3623         info->array.utime         = 0;
3624         info->array.chunk_size    = 0;
3625
3626         info->disk.major = 0;
3627         info->disk.minor = 0;
3628         info->disk.raid_disk = -1;
3629         info->reshape_active = 0;
3630         info->array.major_version = -1;
3631         info->array.minor_version = -2;
3632         strcpy(info->text_version, "imsm");
3633         info->safe_mode_delay = 0;
3634         info->disk.number = -1;
3635         info->disk.state = 0;
3636         info->name[0] = 0;
3637         info->recovery_start = MaxSector;
3638         info->recovery_blocked = imsm_reshape_blocks_arrays_changes(st->sb);
3639         info->bb.supported = 1;
3640
3641         /* do we have the all the insync disks that we expect? */
3642         mpb = super->anchor;
3643         info->events = __le32_to_cpu(mpb->generation_num);
3644
3645         for (i = 0; i < mpb->num_raid_devs; i++) {
3646                 struct imsm_dev *dev = get_imsm_dev(super, i);
3647                 int failed, enough, j, missing = 0;
3648                 struct imsm_map *map;
3649                 __u8 state;
3650
3651                 failed = imsm_count_failed(super, dev, MAP_0);
3652                 state = imsm_check_degraded(super, dev, failed, MAP_0);
3653                 map = get_imsm_map(dev, MAP_0);
3654
3655                 /* any newly missing disks?
3656                  * (catches single-degraded vs double-degraded)
3657                  */
3658                 for (j = 0; j < map->num_members; j++) {
3659                         __u32 ord = get_imsm_ord_tbl_ent(dev, j, MAP_0);
3660                         __u32 idx = ord_to_idx(ord);
3661
3662                         if (super->disks && super->disks->index == (int)idx)
3663                                 info->disk.raid_disk = j;
3664
3665                         if (!(ord & IMSM_ORD_REBUILD) &&
3666                             get_imsm_missing(super, idx)) {
3667                                 missing = 1;
3668                                 break;
3669                         }
3670                 }
3671
3672                 if (state == IMSM_T_STATE_FAILED)
3673                         enough = -1;
3674                 else if (state == IMSM_T_STATE_DEGRADED &&
3675                          (state != map->map_state || missing))
3676                         enough = 0;
3677                 else /* we're normal, or already degraded */
3678                         enough = 1;
3679                 if (is_gen_migration(dev) && missing) {
3680                         /* during general migration we need all disks
3681                          * that process is running on.
3682                          * No new missing disk is allowed.
3683                          */
3684                         max_enough = -1;
3685                         enough = -1;
3686                         /* no more checks necessary
3687                          */
3688                         break;
3689                 }
3690                 /* in the missing/failed disk case check to see
3691                  * if at least one array is runnable
3692                  */
3693                 max_enough = max(max_enough, enough);
3694         }
3695         dprintf("enough: %d\n", max_enough);
3696         info->container_enough = max_enough;
3697
3698         if (super->disks) {
3699                 __u32 reserved = imsm_reserved_sectors(super, super->disks);
3700
3701                 disk = &super->disks->disk;
3702                 info->data_offset = total_blocks(&super->disks->disk) - reserved;
3703                 info->component_size = reserved;
3704                 info->disk.state  = is_configured(disk) ? (1 << MD_DISK_ACTIVE) : 0;
3705                 /* we don't change info->disk.raid_disk here because
3706                  * this state will be finalized in mdmon after we have
3707                  * found the 'most fresh' version of the metadata
3708                  */
3709                 info->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
3710                 info->disk.state |= (is_spare(disk) || is_journal(disk)) ?
3711                                     0 : (1 << MD_DISK_SYNC);
3712         }
3713
3714         /* only call uuid_from_super_imsm when this disk is part of a populated container,
3715          * ->compare_super may have updated the 'num_raid_devs' field for spares
3716          */
3717         if (info->disk.state & (1 << MD_DISK_SYNC) || super->anchor->num_raid_devs)
3718                 uuid_from_super_imsm(st, info->uuid);
3719         else
3720                 memcpy(info->uuid, uuid_zero, sizeof(uuid_zero));
3721
3722         /* I don't know how to compute 'map' on imsm, so use safe default */
3723         if (map) {
3724                 int i;
3725                 for (i = 0; i < map_disks; i++)
3726                         map[i] = 1;
3727         }
3728
3729 }
3730
3731 /* allocates memory and fills disk in mdinfo structure
3732  * for each disk in array */
3733 struct mdinfo *getinfo_super_disks_imsm(struct supertype *st)
3734 {
3735         struct mdinfo *mddev;
3736         struct intel_super *super = st->sb;
3737         struct imsm_disk *disk;
3738         int count = 0;
3739         struct dl *dl;
3740         if (!super || !super->disks)
3741                 return NULL;
3742         dl = super->disks;
3743         mddev = xcalloc(1, sizeof(*mddev));
3744         while (dl) {
3745                 struct mdinfo *tmp;
3746                 disk = &dl->disk;
3747                 tmp = xcalloc(1, sizeof(*tmp));
3748                 if (mddev->devs)
3749                         tmp->next = mddev->devs;
3750                 mddev->devs = tmp;
3751                 tmp->disk.number = count++;
3752                 tmp->disk.major = dl->major;
3753                 tmp->disk.minor = dl->minor;
3754                 tmp->disk.state = is_configured(disk) ?
3755                                   (1 << MD_DISK_ACTIVE) : 0;
3756                 tmp->disk.state |= is_failed(disk) ? (1 << MD_DISK_FAULTY) : 0;
3757                 tmp->disk.state |= is_spare(disk) ? 0 : (1 << MD_DISK_SYNC);
3758                 tmp->disk.raid_disk = -1;
3759                 dl = dl->next;
3760         }
3761         return mddev;
3762 }
3763
3764 static int update_super_imsm(struct supertype *st, struct mdinfo *info,
3765                              char *update, char *devname, int verbose,
3766                              int uuid_set, char *homehost)
3767 {
3768         /* For 'assemble' and 'force' we need to return non-zero if any
3769          * change was made.  For others, the return value is ignored.
3770          * Update options are:
3771          *  force-one : This device looks a bit old but needs to be included,
3772          *        update age info appropriately.
3773          *  assemble: clear any 'faulty' flag to allow this device to
3774          *              be assembled.
3775          *  force-array: Array is degraded but being forced, mark it clean
3776          *         if that will be needed to assemble it.
3777          *
3778          *  newdev:  not used ????
3779          *  grow:  Array has gained a new device - this is currently for
3780          *              linear only
3781          *  resync: mark as dirty so a resync will happen.
3782          *  name:  update the name - preserving the homehost
3783          *  uuid:  Change the uuid of the array to match watch is given
3784          *
3785          * Following are not relevant for this imsm:
3786          *  sparc2.2 : update from old dodgey metadata
3787          *  super-minor: change the preferred_minor number
3788          *  summaries:  update redundant counters.
3789          *  homehost:  update the recorded homehost
3790          *  _reshape_progress: record new reshape_progress position.
3791          */
3792         int rv = 1;
3793         struct intel_super *super = st->sb;
3794         struct imsm_super *mpb;
3795
3796         /* we can only update container info */
3797         if (!super || super->current_vol >= 0 || !super->anchor)
3798                 return 1;
3799
3800         mpb = super->anchor;
3801
3802         if (strcmp(update, "uuid") == 0) {
3803                 /* We take this to mean that the family_num should be updated.
3804                  * However that is much smaller than the uuid so we cannot really
3805                  * allow an explicit uuid to be given.  And it is hard to reliably
3806                  * know if one was.
3807                  * So if !uuid_set we know the current uuid is random and just used
3808                  * the first 'int' and copy it to the other 3 positions.
3809                  * Otherwise we require the 4 'int's to be the same as would be the
3810                  * case if we are using a random uuid.  So an explicit uuid will be
3811                  * accepted as long as all for ints are the same... which shouldn't hurt
3812                  */
3813                 if (!uuid_set) {
3814                         info->uuid[1] = info->uuid[2] = info->uuid[3] = info->uuid[0];
3815                         rv = 0;
3816                 } else {
3817                         if (info->uuid[0] != info->uuid[1] ||
3818                             info->uuid[1] != info->uuid[2] ||
3819                             info->uuid[2] != info->uuid[3])
3820                                 rv = -1;
3821                         else
3822                                 rv = 0;
3823                 }
3824                 if (rv == 0)
3825                         mpb->orig_family_num = info->uuid[0];
3826         } else if (strcmp(update, "assemble") == 0)
3827                 rv = 0;
3828         else
3829                 rv = -1;
3830
3831         /* successful update? recompute checksum */
3832         if (rv == 0)
3833                 mpb->check_sum = __le32_to_cpu(__gen_imsm_checksum(mpb));
3834
3835         return rv;
3836 }
3837
3838 static size_t disks_to_mpb_size(int disks)
3839 {
3840         size_t size;
3841
3842         size = sizeof(struct imsm_super);
3843         size += (disks - 1) * sizeof(struct imsm_disk);
3844         size += 2 * sizeof(struct imsm_dev);
3845         /* up to 2 maps per raid device (-2 for imsm_maps in imsm_dev */
3846         size += (4 - 2) * sizeof(struct imsm_map);
3847         /* 4 possible disk_ord_tbl's */
3848         size += 4 * (disks - 1) * sizeof(__u32);
3849         /* maximum bbm log */
3850         size += sizeof(struct bbm_log);
3851
3852         return size;
3853 }
3854
3855 static __u64 avail_size_imsm(struct supertype *st, __u64 devsize,
3856                              unsigned long long data_offset)
3857 {
3858         if (devsize < (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS))
3859                 return 0;
3860
3861         return devsize - (MPB_SECTOR_CNT + IMSM_RESERVED_SECTORS);
3862 }
3863
3864 static void free_devlist(struct intel_super *super)
3865 {
3866       &nb